diff --git a/acinclude.m4 b/acinclude.m4 index 5a4b1df370..b7cb7fb1a8 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -67,9 +67,7 @@ m4_include(config/ompi_check_icc.m4) m4_include(config/ompi_check_gm.m4) m4_include(config/ompi_check_mx.m4) m4_include(config/ompi_check_alps.m4) -m4_include(config/ompi_check_bproc.m4) m4_include(config/ompi_check_lsf.m4) -m4_include(config/ompi_check_xcpu.m4) m4_include(config/ompi_check_openib.m4) m4_include(config/ompi_check_portals.m4) m4_include(config/ompi_check_psm.m4) diff --git a/config/ompi_check_bproc.m4 b/config/ompi_check_bproc.m4 deleted file mode 100644 index 32dc951f89..0000000000 --- a/config/ompi_check_bproc.m4 +++ /dev/null @@ -1,67 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# new bproc is LANL versions >= 3.2.0 -# old bproc is all Scyld versions and LANL version < 3.2.0 -# OMPI_CHECK_BPROC(prefix, [action-if-new-bproc], [action-if-old-bproc], -# [action-if-not-found]) -# -------------------------------------------------------- -AC_DEFUN([OMPI_CHECK_BPROC],[ - AC_ARG_WITH([bproc], - [AC_HELP_STRING([--with-bproc], - [Directory where the BProc software is installed])]) - - AS_IF([test ! -z "$with_bproc" -a "$with_bproc" = "no"],[$4], [ - ompi_check_bproc_save_CPPFLAGS="$CPPFLAGS" - ompi_check_bproc_save_LDFLAGS="$LDFLAGS" - ompi_check_bproc_save_LIBS="$LIBS" - - AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"], - [CPPFLAGS="$CPPFLAGS -I$with_bproc/include" - LDFLAGS="$LDFLAGS -L$with_bproc/lib"]) - AC_CHECK_HEADERS([sys/bproc.h], - [AC_CHECK_LIB([bproc], - [bproc_numnodes], - [ompi_check_bproc_happy="yes"], - [ompi_check_bproc_happy="no"])], - [ompi_check_bproc_happy="no"]) - - # Check for Scyld bproc or an old version of LANL Bproc (pre 3.2.0) - AS_IF([test "$ompi_check_bproc_happy" = "yes"], - [AC_CHECK_HEADERS([sys/bproc_common.h],[ompi_check_bproc_happy="new"], - [ompi_check_bproc_happy="old"], - [#include - #include ])]) - - CPPFLAGS="$ompi_check_bproc_save_CPPFLAGS" - LDFLAGS="$ompi_check_bproc_save_LDFLAGS" - LIBS="$ompi_check_bproc_save_LIBS" - - AS_IF([test "$ompi_check_bproc_happy" != "no"], - [AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"], - [$1_CPPFLAGS="$$1_CPPFLAGS -I$with_bproc/include" - $1_LDFLAGS="$$1_LDFLAGS -L$with_bproc/lib"]) - $1_LIBS="$$1_LIBS -lbproc" - AS_IF([test "$ompi_check_bproc_happy" = "new"], [$2], [$3])], - [AS_IF([test ! -z "$with_bproc"], - [AC_MSG_ERROR([BProc support request but not found. Perhaps -you need to specify the location of the BProc libraries.])]) - $4]) - ]) -]) diff --git a/config/ompi_check_xcpu.m4 b/config/ompi_check_xcpu.m4 deleted file mode 100644 index 0c58f2b97e..0000000000 --- a/config/ompi_check_xcpu.m4 +++ /dev/null @@ -1,63 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -# OMPI_CHECK_XCPU(prefix, [action-if-found], [action-if-not-found]) -# -------------------------------------------------------- -AC_DEFUN([OMPI_CHECK_XCPU],[ - AC_ARG_WITH([xcpu], - [AC_HELP_STRING([--with-xcpu], - [=yes will Build XCPU launcher component (default: no)])]) - - AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" = "no"],[$3], [ - ompi_check_xcpu_save_CPPFLAGS="$CPPFLAGS" - ompi_check_xcpu_save_LDFLAGS="$LDFLAGS" - ompi_check_xcpu_save_LIBS="$LIBS" - - AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" != "yes"], - [CPPFLAGS="$CPPFLAGS -I$with_xcpu/include" - LDFLAGS="$LDFLAGS -L$with_xcpu/lib"]) - - AC_CHECK_HEADERS([libxcpu.h], - [AC_CHECK_LIB([xcpu], - [xp_command_create], - [ompi_check_xcpu_happy="yes"], - [ompi_check_xcpu_happy="no"], - [-lstrutil -lspclient -lspfs -lelf])], - [ompi_check_xcpu_happy="no"], - [#include - #include - #include - #include ]) - - CPPFLAGS="$ompi_check_xcpu_save_CPPFLAGS" - LDFLAGS="$ompi_check_xcpu_save_LDFLAGS" - LIBS="$ompi_check_xcpu_save_LIBS" - - AS_IF([test "$ompi_check_xcpu_happy" != "no"], - [AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" != "yes"], - [$1_CPPFLAGS="$$1_CPPFLAGS -I$with_xcpu/include" - $1_LDFLAGS="$$1_LDFLAGS -L$with_xcpu/lib"]) - $1_LIBS="$$1_LIBS -lxcpu -lstrutil -lspclient -lspfs -lelf" $2], - [AS_IF([test ! -z "$with_xcpu"], - [AC_MSG_ERROR([Xcpu support request but not found. Perhaps -you need to specify the location of the Xcpu libraries.])]) - $3]) - ]) -]) diff --git a/config/ompi_configure_options.m4 b/config/ompi_configure_options.m4 index d7733865e2..8e5ddbb0de 100644 --- a/config/ompi_configure_options.m4 +++ b/config/ompi_configure_options.m4 @@ -546,6 +546,25 @@ AC_DEFINE_UNQUOTED([ORTE_ENABLE_JUMBO_APPS], [$orte_want_jumbo_apps], [Enable support for applications in excess of 32K processes and/or 32K jobs, or running on clusters in excess of 32k nodes]) +# +# Minimal RTE support +# + +AC_MSG_CHECKING([if want full RTE support]) +AC_ARG_ENABLE([rte], + [AC_HELP_STRING([--disable-rte-support], + [Disable RTE support for systems that do not require it (default: full RTE support enabled)])]) +if test "$enable_rte_support" = "no"; then + AC_MSG_RESULT([no]) + orte_disable_full_support=1 +else + AC_MSG_RESULT([yes]) + orte_disable_full_support=0 +fi +AC_DEFINE_UNQUOTED([ORTE_DISABLE_FULL_SUPPORT], [$orte_disable_full_support], + [Enable full RTE support]) +AM_CONDITIONAL(ORTE_DISABLE_FULL_SUPPORT, test "$enable_rte_support" = "no") + # # Cross-compile data # diff --git a/config/ompi_mca.m4 b/config/ompi_mca.m4 index 56a8bcf085..1945e9df4c 100644 --- a/config/ompi_mca.m4 +++ b/config/ompi_mca.m4 @@ -50,7 +50,7 @@ AC_DEFUN([OMPI_MCA],[ AC_ARG_ENABLE([mca-no-build], [AC_HELP_STRING([--enable-mca-no-build=LIST], [Comma-separated list of - pairs - that will not be built. Example: "--enable-mca-no-build=maffinity-libnuma,btl-portals" will disable building both the "libnuma" maffinity and "portals" btl components.])]) + that will not be built. Example: "--enable-mca-no-build=maffinity,btl-portals" will disable building all maffinity components and the "portals" btl components.])]) AC_ARG_ENABLE(mca-dso, AC_HELP_STRING([--enable-mca-dso=LIST], [Comma-separated list of types and/or @@ -88,9 +88,13 @@ AC_DEFUN([OMPI_MCA],[ for item in $enable_mca_no_build; do type="`echo $item | cut -s -f1 -d-`" comp="`echo $item | cut -s -f2- -d-`" - if test -z $type -o -z $comp ; then - AC_MSG_ERROR([*** The enable-no-build flag requires a -*** list of type-component pairs. Invalid input detected.]) + if test -z $type ; then + type=$item + fi + if test -z $comp ; then + str="`echo DISABLE_${type}=1 | sed s/-/_/g`" + eval $str + msg="$item $msg" else str="`echo DISABLE_${type}_${comp}=1 | sed s/-/_/g`" eval $str @@ -359,9 +363,13 @@ AC_DEFUN([MCA_CONFIGURE_FRAMEWORK],[ # abort with a reasonable message. m4_ifdef([mca_$2_no_config_component_list], [], [m4_fatal([Could not find mca_$2_no_config_component_list - rerun autogen.sh without -l])]) + # make sure priority stuff set right m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST], [m4_ifval(mca_$2_no_config_component_list, [m4_fatal([Framework $2 using STOP_AT_FIRST but at least one component has no configure.m4])])]) + m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST_PRIORITY], + [m4_ifval(mca_$2_no_config_component_list, + [m4_fatal([Framework $2 using STOP_AT_FIRST but at least one component has no configure.m4])])]) m4_foreach(mca_component, [mca_$2_no_config_component_list], [m4_ifval(mca_component, [MCA_CONFIGURE_NO_CONFIG_COMPONENT($1, $2, mca_component, @@ -404,7 +412,7 @@ AC_DEFUN([MCA_CONFIGURE_FRAMEWORK],[ # It would be really hard to run these for "find first that # works", so we don't :) m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST], [], - [m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST], [], + [m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST_PRIORITY], [], [AS_IF([test "$3" != "0"], [MCA_CONFIGURE_ALL_CONFIG_COMPONENTS($1, $2, [all_components], [static_components], [dso_components], @@ -876,6 +884,11 @@ AC_DEFUN([MCA_COMPONENT_BUILD_CHECK],[ fi # if we were explicitly disabled, don't build :) + str="DISABLED_COMPONENT_CHECK=\$DISABLE_${framework}" + eval $str + if test "$DISABLED_COMPONENT_CHECK" = "1" ; then + want_component=0 + fi str="DISABLED_COMPONENT_CHECK=\$DISABLE_${framework}_$component" eval $str if test "$DISABLED_COMPONENT_CHECK" = "1" ; then diff --git a/configure.ac b/configure.ac index ae5a39db1c..ceb8bb8a41 100644 --- a/configure.ac +++ b/configure.ac @@ -1263,11 +1263,7 @@ AC_CONFIG_FILES([ orte/include/Makefile orte/etc/Makefile - orte/tools/orteboot/Makefile orte/tools/orted/Makefile - orte/tools/ortehalt/Makefile - orte/tools/ortekill/Makefile - orte/tools/orteprobe/Makefile orte/tools/orterun/Makefile orte/tools/wrappers/Makefile orte/tools/wrappers/ortecc-wrapper-data.txt @@ -1304,6 +1300,7 @@ AC_CONFIG_FILES([ ompi/tools/wrappers/mpif77-wrapper-data.txt ompi/tools/wrappers/mpif90-wrapper-data.txt ompi/tools/ortetools/Makefile + ompi/tools/ompi-server/Makefile test/Makefile test/event/Makefile diff --git a/contrib/platform/cray_xt3_romio b/contrib/platform/cray_xt3_romio index 63a4551495..ce36fb529c 100755 --- a/contrib/platform/cray_xt3_romio +++ b/contrib/platform/cray_xt3_romio @@ -6,7 +6,8 @@ enable_pretty_print_stacktrace=no enable_dlopen=no with_portals_config=redstorm with_memory_manager=none -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,gpr-proxy,gpr-replica,iof-svc,ns-proxy,oob-tcp,pls-rsh,ras-dash_host,ras-hostfile,ras-localhost,rds-hostfile,rds-resfile,rmaps-round_robin,rmgr-proxy,rmgr-urm,rml-oob,sds-env,sds-seed,sds-singleton,btl-sm,btl-self,btl-tcp,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,filem-rsh,grpcomm-basic +enable_mca_no_build=carto-file,maffinity,paffinity,timer,errmgr,iof,odls,oob,plm,ras,rmaps,rml,routed,filem,btl-sm,btl-self,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,libnbc,vt +with_rte_support=no enable_heterogeneous=no enable_pty_support=no enable_mem_debug=no diff --git a/contrib/platform/cray_xt_cnl_romio b/contrib/platform/cray_xt_cnl_romio index 1cb85bacd6..10dd9c6580 100755 --- a/contrib/platform/cray_xt_cnl_romio +++ b/contrib/platform/cray_xt_cnl_romio @@ -1,3 +1,7 @@ +enable_mem_debug=no +enable_mem_profile=no +enable_debug=no +enable_debug_symbols=no enable_io_romio=yes enable_static=yes enable_shared=no @@ -6,12 +10,9 @@ enable_pretty_print_stacktrace=no enable_dlopen=no with_portals_config=cnl_modex with_memory_manager=none -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,pls-rsh,pml-dr,filem-rsh,grpcomm-cnos,pls-cnos,rmgr-cnos,rml-cnos,routed-cnos,sds-portals_utcp,sds-cnos +enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,ess-cnos,pml-dr,filem-rsh,grpcomm-cnos,rmgr-cnos,rml-cnos enable_heterogeneous=no enable_pty_support=no -enable_mem_debug=no -enable_mem_profile=no -enable_debug_symbols=no enable_binaries=yes ompi_cv_f77_sizeof_LOGICAL=${ompi_cv_f77_sizeof_LOGICAL=4} diff --git a/contrib/platform/lanl/roadrunner/debug b/contrib/platform/lanl/roadrunner/debug new file mode 100644 index 0000000000..134ddd3774 --- /dev/null +++ b/contrib/platform/lanl/roadrunner/debug @@ -0,0 +1,20 @@ +with_threads=no +enable_dlopen=no +enable_pty_support=no +with_tm=/opt/PBS +with_wrapper_cflags=-I/opt/panfs/include +LDFLAGS=-L/opt/PBS/lib64 +with_openib=/opt/ofed +with_io_romio_flags=--with-file-system=ufs+nfs+panfs +with_memory_manager=no +enable_mem_debug=yes +enable_mem_profile=no +enable_debug_symbols=yes +enable_binaries=yes +with_devel_headers=yes +enable_heterogeneous=yes +enable_picky=yes +enable_debug=yes +enable_shared=yes +enable_static=no +with_slurm=no diff --git a/contrib/platform/lanl/roadrunner/openmpi-mca-params.conf b/contrib/platform/lanl/roadrunner/openmpi-mca-params.conf new file mode 100644 index 0000000000..b4da08167e --- /dev/null +++ b/contrib/platform/lanl/roadrunner/openmpi-mca-params.conf @@ -0,0 +1,64 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This is the default system-wide MCA parameters defaults file. +# Specifically, the MCA parameter "mca_param_files" defaults to a +# value of +# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" +# (this file is the latter of the two). So if the default value of +# mca_param_files is not changed, this file is used to set system-wide +# MCA parameters. This file can therefore be used to set system-wide +# default MCA parameters for all users. Of course, users can override +# these values if they want, but this file is an excellent location +# for setting system-specific MCA parameters for those users who don't +# know / care enough to investigate the proper values for them. + +# Note that this file is only applicable where it is visible (in a +# filesystem sense). Specifically, MPI processes each read this file +# during their startup to determine what default values for MCA +# parameters should be used. mpirun does not bundle up the values in +# this file from the node where it was run and send them to all nodes; +# the default value decisions are effectively distributed. Hence, +# these values are only applicable on nodes that "see" this file. If +# $sysconf is a directory on a local disk, it is likely that changes +# to this file will need to be propagated to other nodes. If $sysconf +# is a directory that is shared via a networked filesystem, changes to +# this file will be visible to all nodes that share this $sysconf. + +# The format is straightforward: one per line, mca_param_name = +# rvalue. Quoting is ignored (so if you use quotes or escape +# characters, they'll be included as part of the value). For example: + +# Disable run-time MPI parameter checking +# mpi_param_check = 0 + +# Note that the value "~/" will be expanded to the current user's home +# directory. For example: + +# Change component loading path +# component_path = /usr/local/lib/openmpi:~/my_openmpi_components + +# See "ompi_info --param all all" for a full listing of Open MPI MCA +# parameters available and their default values. + +oob_tcp_if_include = ib0 +mpi_preconnect_oob = 1 +btl_sm_free_list_max = 768 +oob_tcp_connect_timeout = 600 +oob_tcp_if_include = ib0 diff --git a/contrib/platform/lanl/roadrunner/optimized b/contrib/platform/lanl/roadrunner/optimized new file mode 100644 index 0000000000..cbedace4af --- /dev/null +++ b/contrib/platform/lanl/roadrunner/optimized @@ -0,0 +1,18 @@ +with_threads=no +enable_dlopen=no +enable_pty_support=no +with_tm=/opt/PBS +LDFLAGS=-L/opt/PBS/lib64 +with_openib=/opt/ofed +with_memory_manager=yes +enable_mem_debug=no +enable_mem_profile=no +enable_debug_symbols=no +enable_binaries=yes +with_devel_headers=no +enable_heterogeneous=yes +enable_debug=no +enable_shared=yes +with_wrapper_cflags=-I/opt/panfs/include +with_io_romio_flags=--with-file-system=ufs+nfs+panfs +with_slurm=no diff --git a/contrib/platform/portals-ref-rte b/contrib/platform/portals-ref-rte index 4a5af66056..8b60362bc4 100644 --- a/contrib/platform/portals-ref-rte +++ b/contrib/platform/portals-ref-rte @@ -10,13 +10,9 @@ enable_dlopen=no with_portals_config=utcp with_memory_manager=no enable_heterogeneous=no -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,gpr-proxy,gpr-replica,rml-oob,btl-sm,mpool-sm,btl-self,ns-proxy,rds-resfile,rds-hostfile,sds-env,sds-pipe,sds-seed,sds-singleton,coll-hierarch,coll-sm,pml-dr,btl-tcp,oob-tcp,ras-dash_host,ras-hostfile,ras-localhost,rmaps-round_robin,rmgr-urm,rmgr-proxy,pls-fork,pls-rsh,common-sm,iof-svc -with_slurm=no -with_bproc=no -with_mvapi=no +enable_mca_no_build=maffinity,paffinity,timer,errmgr,iof,odls,oob,plm,ras,rmaps,rml,routed,btl-sm,mpool-sm,btl-self,coll-hierarch,coll-sm,pml-dr,btl-tcp,common-sm +with_rte_support=no with_openib=no with_gm=no with_mx=no -with_rml_cnos=utcp -with_rmgr_cnos=utcp enable_binaries=no diff --git a/contrib/platform/ps3 b/contrib/platform/ps3 index 8cb8085af3..80dd0ebf6d 100644 --- a/contrib/platform/ps3 +++ b/contrib/platform/ps3 @@ -5,7 +5,7 @@ with_threads=no enable_pretty_print_stacktrace=no enable_dlopen=no with_memory_manager=none -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,allocator-basic,rcache-vma,pls-gridengine,pls-slurm,ras-slurm,ras-gridengine,btl-sm,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,pml-cm,mpool-rdma,osc-rdma,sds-slurm,backtrace-darwin,memory-darwin,memory-malloc_hook,memory_ptmalloc2,paffinity-solaris,paffinity-windows,timer-aix,timer-altix,timer-darwin,timer-solaris,timer-windows +enable_mca_no_build=maffinity,paffinity,timer,allocator-basic,rcache-vma,plm-gridengine,plm-slurm,ras-slurm,ras-gridengine,btl-sm,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,pml-cm,mpool-rdma,osc-rdma,ess-slurm,backtrace-darwin,memory-darwin,memory-malloc_hook,memory_ptmalloc2 enable_heterogeneous=no enable_pty_support=no enable_mem_debug=no diff --git a/contrib/platform/redstorm b/contrib/platform/redstorm index 4d3937bf92..dff54a7d39 100644 --- a/contrib/platform/redstorm +++ b/contrib/platform/redstorm @@ -6,7 +6,8 @@ enable_pretty_print_stacktrace=no enable_dlopen=no with_portals_config=redstorm with_memory_manager=none -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,gpr-proxy,gpr-replica,iof-svc,ns-proxy,oob-tcp,pls-rsh,ras-dash_host,ras-hostfile,ras-localhost,rds-hostfile,rds-resfile,rmaps-round_robin,rmgr-proxy,rmgr-urm,rml-oob,sds-env,sds-seed,sds-singleton,btl-sm,btl-self,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,filem-rsh,grpcomm-basic +enable_mca_no_build=carto-file,maffinity,paffinity,timer,errmgr,iof,odls,oob,plm,ras,rmaps,rml,routed,filem,btl-sm,btl-self,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,libnbc,vt +with_rte_support=no enable_heterogeneous=no enable_pty_support=no enable_mem_debug=no diff --git a/ompi/communicator/Makefile.am b/ompi/communicator/Makefile.am index 671630f62a..7eb0ddb8d3 100644 --- a/ompi/communicator/Makefile.am +++ b/ompi/communicator/Makefile.am @@ -25,6 +25,4 @@ headers += \ libmpi_la_SOURCES += \ communicator/comm_init.c \ communicator/comm.c \ - communicator/comm_cid.c \ - communicator/comm_dyn.c \ - communicator/comm_publish.c + communicator/comm_cid.c diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 7205a4a412..ab37e8d86c 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -23,7 +23,10 @@ #include #include "ompi/constants.h" -#include "orte/dss/dss.h" + +#include "opal/dss/dss.h" +#include "orte/util/name_fns.h" + #include "ompi/proc/proc.h" #include "opal/threads/mutex.h" #include "opal/util/bit_ops.h" @@ -31,7 +34,7 @@ #include "opal/util/convert.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" -#include "orte/mca/ns/ns.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" @@ -139,7 +142,7 @@ int ompi_comm_set ( ompi_communicator_t **ncomm, /* Check how many different jobids are represented in this communicator. Necessary for the disconnect of dynamic communicators. */ - ompi_comm_mark_dyncomm (newcomm); + ompi_dpm.mark_dyncomm (newcomm); /* Set error handler */ newcomm->error_handler = errh; @@ -1035,7 +1038,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, ompi_proc_t **rprocs=NULL; orte_std_cntr_t size_len; int int_len, rlen; - orte_buffer_t *sbuf=NULL, *rbuf=NULL; + opal_buffer_t *sbuf=NULL, *rbuf=NULL; void *sendbuf; char *recvbuf; ompi_proc_t **proc_list=NULL; @@ -1045,7 +1048,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, local_size = ompi_comm_size (local_comm); if (local_rank == local_leader) { - sbuf = OBJ_NEW(orte_buffer_t); + sbuf = OBJ_NEW(opal_buffer_t); if (NULL == sbuf) { rc = ORTE_ERROR; goto err_exit; @@ -1065,7 +1068,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, if ( OMPI_SUCCESS != rc ) { goto err_exit; } - if (ORTE_SUCCESS != (rc = orte_dss.unload(sbuf, &sendbuf, &size_len))) { + if (ORTE_SUCCESS != (rc = opal_dss.unload(sbuf, &sendbuf, &size_len))) { goto err_exit; } @@ -1131,13 +1134,13 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, goto err_exit; } - rbuf = OBJ_NEW(orte_buffer_t); + rbuf = OBJ_NEW(opal_buffer_t); if (NULL == rbuf) { rc = ORTE_ERROR; goto err_exit; } - if (ORTE_SUCCESS != (rc = orte_dss.load(rbuf, recvbuf, rlen))) { + if (ORTE_SUCCESS != (rc = opal_dss.load(rbuf, recvbuf, rlen))) { goto err_exit; } @@ -1250,7 +1253,7 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high ) theirproc = ompi_group_peer_lookup(intercomm->c_remote_group,0); mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; - rc = orte_ns.compare_fields(mask, &(ourproc->proc_name), &(theirproc->proc_name)); + rc = orte_util_compare_name_fields(mask, &(ourproc->proc_name), &(theirproc->proc_name)); if ( 0 > rc ) { flag = true; } @@ -1611,7 +1614,7 @@ static int ompi_comm_fill_rest (ompi_communicator_t *comm, /* verify whether to set the flag, that this comm contains process from more than one jobid. */ - ompi_comm_mark_dyncomm (comm); + ompi_dpm.mark_dyncomm (comm); /* set the error handler */ comm->error_handler = errh; diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index 39348f5640..4c46d19b47 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -22,9 +22,9 @@ #include "ompi_config.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "opal/util/convert.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" #include "ompi/communicator/communicator.h" #include "ompi/proc/proc.h" #include "ompi/constants.h" @@ -35,10 +35,10 @@ #include "orte/mca/rml/rml.h" #include "ompi/request/request.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/mca/dpm/dpm.h" + +BEGIN_C_DECLS -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif /** * These functions make sure, that we determine the global result over * an intra communicators (simple), an inter-communicator and a @@ -774,26 +774,26 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf, } if (local_rank == local_leader ) { - orte_buffer_t *sbuf; - orte_buffer_t *rbuf; + opal_buffer_t *sbuf; + opal_buffer_t *rbuf; - sbuf = OBJ_NEW(orte_buffer_t); - rbuf = OBJ_NEW(orte_buffer_t); + sbuf = OBJ_NEW(opal_buffer_t); + rbuf = OBJ_NEW(opal_buffer_t); - if (ORTE_SUCCESS != (rc = orte_dss.pack(sbuf, tmpbuf, (orte_std_cntr_t)count, ORTE_INT))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(sbuf, tmpbuf, (orte_std_cntr_t)count, OPAL_INT))) { goto exit; } if ( send_first ) { - rc = orte_rml.send_buffer(remote_leader, sbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0); - rc = orte_rml.recv_buffer(remote_leader, rbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0); + rc = orte_rml.send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0); + rc = orte_rml.recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0); } else { - rc = orte_rml.recv_buffer(remote_leader, rbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0); - rc = orte_rml.send_buffer(remote_leader, sbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0); + rc = orte_rml.recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0); + rc = orte_rml.send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0); } - if (ORTE_SUCCESS != (rc = orte_dss.unpack(rbuf, outbuf, &size_count, ORTE_INT))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, outbuf, &size_count, OPAL_INT))) { goto exit; } OBJ_RELEASE(sbuf); @@ -834,6 +834,5 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf, return (rc); } -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif + +END_C_DECLS diff --git a/ompi/communicator/comm_dyn.c b/ompi/communicator/comm_dyn.c deleted file mode 100644 index c766f2c8b3..0000000000 --- a/ompi/communicator/comm_dyn.c +++ /dev/null @@ -1,1069 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2007 Cisco, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include -#ifdef HAVE_SYS_UIO_H -#include -#endif -#ifdef HAVE_NET_UIO_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ - -#include "opal/util/opal_environ.h" -#include "opal/util/printf.h" -#include "opal/util/convert.h" -#include "opal/threads/mutex.h" -#include "opal/util/bit_ops.h" -#include "opal/util/argv.h" - -#include "ompi/communicator/communicator.h" -#include "ompi/request/request.h" -#include "ompi/errhandler/errhandler.h" -#include "ompi/proc/proc.h" -#include "ompi/info/info.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/runtime/ompi_module_exchange.h" - -#include "orte/util/proc_info.h" -#include "orte/dss/dss.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ras/ras_types.h" -#include "orte/mca/rmaps/rmaps_types.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/smr/smr_types.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/grpcomm/grpcomm.h" - -#include "orte/runtime/runtime.h" - -static int ompi_comm_get_rport (orte_process_name_t *port, - int send_first, struct ompi_proc_t *proc, - orte_rml_tag_t tag, orte_process_name_t *rport); - - -int ompi_comm_connect_accept ( ompi_communicator_t *comm, int root, - orte_process_name_t *port, int send_first, - ompi_communicator_t **newcomm, orte_rml_tag_t tag ) -{ - int size, rsize, rank, rc; - orte_std_cntr_t num_vals; - orte_std_cntr_t rnamebuflen = 0; - int rnamebuflen_int = 0; - void *rnamebuf=NULL; - - ompi_communicator_t *newcomp=MPI_COMM_NULL; - ompi_proc_t **rprocs=NULL; - ompi_group_t *group=comm->c_local_group; - orte_process_name_t *rport=NULL, tmp_port_name; - orte_buffer_t *nbuf=NULL, *nrbuf=NULL; - ompi_proc_t **proc_list=NULL, **new_proc_list; - int i,j, new_proc_len; - ompi_group_t *new_group_pointer; - - size = ompi_comm_size ( comm ); - rank = ompi_comm_rank ( comm ); - - /* tell the progress engine to tick the event library more - often, to make sure that the OOB messages get sent */ - opal_progress_event_users_increment(); - - if ( rank == root ) { - /* The process receiving first does not have yet the contact - information of the remote process. Therefore, we have to - exchange that. - */ - - if(!OMPI_GROUP_IS_DENSE(group)) { - proc_list = (ompi_proc_t **) calloc (group->grp_proc_count, - sizeof (ompi_proc_t *)); - for(i=0 ; igrp_proc_count ; i++) - proc_list[i] = ompi_group_peer_lookup(group,i); - } - - if ( OMPI_COMM_JOIN_TAG != (int)tag ) { - if(OMPI_GROUP_IS_DENSE(group)){ - rc = ompi_comm_get_rport(port,send_first, - group->grp_proc_pointers[rank], tag, - &tmp_port_name); - } - else { - rc = ompi_comm_get_rport(port,send_first, - proc_list[rank], tag, - &tmp_port_name); - } - if (OMPI_SUCCESS != rc) { - return rc; - } - rport = &tmp_port_name; - } else { - rport = port; - } - - /* Generate the message buffer containing the number of processes and the list of - participating processes */ - nbuf = OBJ_NEW(orte_buffer_t); - if (NULL == nbuf) { - return OMPI_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(nbuf, &size, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - if(OMPI_GROUP_IS_DENSE(group)) { - ompi_proc_pack(group->grp_proc_pointers, size, nbuf); - } - else { - ompi_proc_pack(proc_list, size, nbuf); - } - - nrbuf = OBJ_NEW(orte_buffer_t); - if (NULL == nrbuf ) { - rc = OMPI_ERROR; - goto exit; - } - - /* Exchange the number and the list of processes in the groups */ - if ( send_first ) { - rc = orte_rml.send_buffer(rport, nbuf, tag, 0); - rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); - } else { - rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); - rc = orte_rml.send_buffer(rport, nbuf, tag, 0); - } - - if (ORTE_SUCCESS != (rc = orte_dss.unload(nrbuf, &rnamebuf, &rnamebuflen))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - } - - /* First convert the size_t to an int so we can cast in the bcast to a void * - * if we don't then we will get badness when using big vs little endian - * THIS IS NO LONGER REQUIRED AS THE LENGTH IS NOW A STD_CNTR_T, WHICH - * CORRELATES TO AN INT32 - */ - rnamebuflen_int = (int)rnamebuflen; - - /* bcast the buffer-length to all processes in the local comm */ - rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm, - comm->c_coll.coll_bcast_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - rnamebuflen = rnamebuflen_int; - - if ( rank != root ) { - /* non root processes need to allocate the buffer manually */ - rnamebuf = (char *) malloc(rnamebuflen); - if ( NULL == rnamebuf ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - /* bcast list of processes to all procs in local group - and reconstruct the data. Note that proc_get_proclist - adds processes, which were not known yet to our - process pool. - */ - rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm, - comm->c_coll.coll_bcast_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - nrbuf = OBJ_NEW(orte_buffer_t); - if (NULL == nrbuf) { - goto exit; - } - if ( ORTE_SUCCESS != ( rc = orte_dss.load(nrbuf, rnamebuf, rnamebuflen))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - num_vals = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(nrbuf, &rsize, &num_vals, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - rc = ompi_proc_unpack(nrbuf, rsize, &rprocs, &new_proc_len, &new_proc_list); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - /* If we added new procs, we need to do the modex and then call - PML add_procs */ - if (new_proc_len > 0) { - opal_list_t all_procs; - orte_namelist_t *name; - orte_buffer_t mdx_buf, rbuf; - - OBJ_CONSTRUCT(&all_procs, opal_list_t); - - if (send_first) { - for (i = 0 ; i < group->grp_proc_count ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = &(ompi_group_peer_lookup(group, i)->proc_name); - opal_list_append(&all_procs, &name->item); - } - - for (i = 0 ; i < rsize ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = &(rprocs[i]->proc_name); - opal_list_append(&all_procs, &name->item); - } - } else { - for (i = 0 ; i < rsize ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = &(rprocs[i]->proc_name); - opal_list_append(&all_procs, &name->item); - } - - for (i = 0 ; i < group->grp_proc_count ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = &(ompi_group_peer_lookup(group, i)->proc_name); - opal_list_append(&all_procs, &name->item); - } - } - - OBJ_CONSTRUCT(&mdx_buf, orte_buffer_t); - if (OMPI_SUCCESS != (rc = ompi_modex_get_my_buffer(&mdx_buf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - OBJ_CONSTRUCT(&rbuf, orte_buffer_t); - if (OMPI_SUCCESS != (rc = orte_grpcomm.allgather_list(&all_procs, - &mdx_buf, - &rbuf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - OBJ_DESTRUCT(&mdx_buf); - - if (OMPI_SUCCESS != (rc = ompi_modex_process_data(&rbuf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - OBJ_DESTRUCT(&rbuf); - - /* - while (NULL != (item = opal_list_remove_first(&all_procs))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&all_procs); - */ - - MCA_PML_CALL(add_procs(new_proc_list, new_proc_len)); - } - - OBJ_RELEASE(nrbuf); - if ( rank == root ) { - OBJ_RELEASE(nbuf); - } - - new_group_pointer=ompi_group_allocate(rsize); - if( NULL == new_group_pointer ) { - return MPI_ERR_GROUP; - } - - /* put group elements in the list */ - for (j = 0; j < rsize; j++) { - new_group_pointer->grp_proc_pointers[j] = rprocs[j]; - } /* end proc loop */ - - /* increment proc reference counters */ - ompi_group_increment_proc_count(new_group_pointer); - - /* set up communicator structure */ - rc = ompi_comm_set ( &newcomp, /* new comm */ - comm, /* old comm */ - group->grp_proc_count, /* local_size */ - NULL, /* local_procs */ - rsize, /* remote_size */ - NULL , /* remote_procs */ - NULL, /* attrs */ - comm->error_handler, /* error handler */ - NULL, /* topo component */ - group, /* local group */ - new_group_pointer /* remote group */ - ); - if ( NULL == newcomp ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - ompi_group_decrement_proc_count (new_group_pointer); - OBJ_RELEASE(new_group_pointer); - new_group_pointer = MPI_GROUP_NULL; - - /* allocate comm_cid */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - rport, /* remote leader */ - OMPI_COMM_CID_INTRA_OOB, /* mode */ - send_first ); /* send or recv first */ - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - /* activate comm and init coll-component */ - rc = ompi_comm_activate ( newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - rport, /* remote leader */ - OMPI_COMM_CID_INTRA_OOB, /* mode */ - send_first, /* send or recv first */ - 0); /* sync_flag */ - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - /* Question: do we have to re-start some low level stuff - to enable the usage of fast communication devices - between the two worlds ? - */ - - - exit: - /* done with OOB and such - slow our tick rate again */ - opal_progress(); - opal_progress_event_users_decrement(); - - if ( NULL != rprocs ) { - free ( rprocs ); - } - if ( NULL != proc_list ) { - free ( proc_list ); - } - if ( OMPI_SUCCESS != rc ) { - if ( MPI_COMM_NULL != newcomp && NULL != newcomp ) { - OBJ_RETAIN(newcomp); - newcomp = MPI_COMM_NULL; - } - } - - *newcomm = newcomp; - return rc; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* - * This routine is necessary, since in the connect/accept case, the processes - * executing the connect operation have the OOB contact information of the - * leader of the remote group, however, the processes executing the - * accept get their own port_name = OOB contact information passed in as - * an argument. This is however useless. - * - * Therefore, the two root processes exchange this information at this - * point. - * - */ -int ompi_comm_get_rport(orte_process_name_t *port, int send_first, - ompi_proc_t *proc, orte_rml_tag_t tag, - orte_process_name_t *rport_name) -{ - int rc; - orte_std_cntr_t num_vals; - - if ( send_first ) { - orte_buffer_t *sbuf; - - sbuf = OBJ_NEW(orte_buffer_t); - if (NULL == sbuf) { - return OMPI_ERROR; - } - if (ORTE_SUCCESS != (rc = orte_dss.pack(sbuf, &(proc->proc_name), 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(sbuf); - return rc; - } - - rc = orte_rml.send_buffer(port, sbuf, tag, 0); - OBJ_RELEASE(sbuf); - if ( 0 > rc ) { - ORTE_ERROR_LOG(rc); - return rc; - } - - *rport_name = *port; - } else { - orte_buffer_t *rbuf; - - rbuf = OBJ_NEW(orte_buffer_t); - if (NULL == rbuf) { - return ORTE_ERROR; - } - if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, rbuf, tag, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rbuf); - return rc; - } - - num_vals = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(rbuf, rport_name, &num_vals, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rbuf); - return rc; - } - OBJ_RELEASE(rbuf); - } - - return OMPI_SUCCESS; -} - - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -int -ompi_comm_start_processes(int count, char **array_of_commands, - char ***array_of_argv, - int *array_of_maxprocs, - MPI_Info *array_of_info, - char *port_name) -{ - int rc, i, j, counter; - int have_wdir=0; - bool have_prefix; - int valuelen=OMPI_PATH_MAX, flag=0; - char cwd[OMPI_PATH_MAX]; - char host[OMPI_PATH_MAX]; /*** should define OMPI_HOST_MAX ***/ - char prefix[OMPI_PATH_MAX]; - char *base_prefix; - - orte_std_cntr_t num_apps, ai; - orte_jobid_t new_jobid=ORTE_JOBID_INVALID; - orte_app_context_t **apps=NULL; - - opal_list_t attributes; - opal_list_item_t *item; - - bool timing = false; - struct timeval ompistart, ompistop; - int param, value; - - /* parse the info object */ - /* check potentially for: - - "host": desired host where to spawn the processes - - "prefix": the path to the root of the directory tree where ompi - executables and libraries can be found - - "arch": desired architecture - - "wdir": directory, where executable can be found - - "path": list of directories where to look for the executable - - "file": filename, where additional information is provided. - - "soft": see page 92 of MPI-2. - */ - - /* make sure the progress engine properly trips the event library */ - opal_progress_event_users_increment(); - - /* check to see if we want timing information */ - param = mca_base_param_reg_int_name("ompi", "timing", - "Request that critical timing loops be measured", - false, false, 0, &value); - if (value != 0) { - timing = true; - if (0 != gettimeofday(&ompistart, NULL)) { - opal_output(0, "ompi_comm_start_procs: could not obtain start time"); - ompistart.tv_sec = 0; - ompistart.tv_usec = 0; - } - } - - /* setup to record the attributes */ - OBJ_CONSTRUCT(&attributes, opal_list_t); - - /* we want to be able to default the prefix to the one used for this job - * so that the ompi executables and libraries can be found. the user can - * later override this value by providing an MPI_Info value. for now, though, - * let's get the default value off the registry - */ - rc = orte_rmgr.get_app_context(orte_process_info.my_name->jobid, &apps, &num_apps); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* we'll just use the prefix from the first member of the app_context array. - * this shouldn't matter as they all should be the same. it could be NULL, of - * course (user might not have specified it), so we need to protect against that. - * - * It's possible that no app_contexts are returned (e.g., during a comm_spawn - * from a singleton), so check first - */ - if (NULL != apps && NULL != apps[0]->prefix_dir) { - base_prefix = strdup(apps[0]->prefix_dir); - } else { - base_prefix = NULL; - } - /* cleanup the memory we used */ - if(NULL != apps) { - for (ai = 0; ai < num_apps; ai++) { - OBJ_RELEASE(apps[ai]); - } - free(apps); - } - - /* Convert the list of commands to an array of orte_app_context_t - pointers */ - apps = (orte_app_context_t**)malloc(count * sizeof(orte_app_context_t *)); - if (NULL == apps) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - for (i = 0; i < count; ++i) { - apps[i] = OBJ_NEW(orte_app_context_t); - if (NULL == apps[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - /* rollback what was already done */ - for (j=0; j < i; j++) OBJ_RELEASE(apps[j]); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* copy over the name of the executable */ - apps[i]->app = strdup(array_of_commands[i]); - if (NULL == apps[i]->app) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - /* rollback what was already done */ - for (j=0; j < i; j++) OBJ_RELEASE(apps[j]); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* record the number of procs to be generated */ - apps[i]->num_procs = array_of_maxprocs[i]; - - /* copy over the argv array */ - counter = 1; - - if (MPI_ARGVS_NULL != array_of_argv && - MPI_ARGV_NULL != array_of_argv[i]) { - /* first need to find out how many entries there are */ - j=0; - while (NULL != array_of_argv[i][j]) { - j++; - } - counter += j; - } - - /* now copy them over, ensuring to NULL terminate the array */ - apps[i]->argv = (char**)malloc((1 + counter) * sizeof(char*)); - if (NULL == apps[i]->argv) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - /* rollback what was already done */ - for (j=0; j < i; j++) { - OBJ_RELEASE(apps[j]); - } - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - apps[i]->argv[0] = strdup(array_of_commands[i]); - for (j=1; j < counter; j++) { - apps[i]->argv[j] = strdup(array_of_argv[i][j-1]); - } - apps[i]->argv[counter] = NULL; - - - /* the environment gets set by the launcher - * all we need to do is add the specific values - * needed for comm_spawn - */ - /* Add environment variable with the contact information for the - child processes. - */ - counter = 1; - apps[i]->env = (char**)malloc((1+counter) * sizeof(char*)); - if (NULL == apps[i]->env) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - /* rollback what was already done */ - for (j=0; j < i; j++) OBJ_RELEASE(apps[j]); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - asprintf(&(apps[i]->env[0]), "OMPI_PARENT_PORT=%s", port_name); - apps[i]->env[1] = NULL; - for (j = 0; NULL != environ[j]; ++j) { - if (0 == strncmp("OMPI_", environ[j], 5)) { - opal_argv_append_nosize(&apps[i]->env, environ[j]); - } - } - - /* Check for well-known info keys */ - have_wdir = 0; - have_prefix = false; - if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { - - /* check for 'wdir' */ - ompi_info_get (array_of_info[i], "wdir", valuelen, cwd, &flag); - if ( flag ) { - apps[i]->cwd = strdup(cwd); - have_wdir = 1; - } - - /* check for 'host' */ - ompi_info_get (array_of_info[i], "host", sizeof(host), host, &flag); - if ( flag ) { - apps[i]->num_map = 1; - apps[i]->map_data = (orte_app_context_map_t **) malloc(sizeof(orte_app_context_map_t *)); - apps[i]->map_data[0] = OBJ_NEW(orte_app_context_map_t); - apps[i]->map_data[0]->map_type = ORTE_APP_CONTEXT_MAP_HOSTNAME; - apps[i]->map_data[0]->map_data = strdup(host); - } - - /* 'path', 'arch', 'file', 'soft' -- to be implemented */ - - /* check for 'ompi_prefix' (OMPI-specific -- to effect the same - * behavior as --prefix option to orterun) - */ - ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix), prefix, &flag); - if ( flag ) { - apps[i]->prefix_dir = strdup(prefix); - have_prefix = true; - } - } - - /* default value: If the user did not tell us where to look for the - executable, we assume the current working directory */ - if ( !have_wdir ) { - getcwd(cwd, OMPI_PATH_MAX); - apps[i]->cwd = strdup(cwd); - } - - /* if the user told us a new prefix, then we leave it alone. otherwise, if - * a prefix had been provided before, copy that one into the new app_context - * for use by the spawned children - */ - if ( !have_prefix && NULL != base_prefix) { - apps[i]->prefix_dir = strdup(base_prefix); - } - - /* leave the map info alone - the launcher will - * decide where to put things - */ - } /* for (i = 0 ; i < count ; ++i) */ - - /* cleanup */ - if (NULL != base_prefix) { - free(base_prefix); - } - - /* tell the RTE that we want to be the new job to be a child of this process' job */ - if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_NS_USE_PARENT, - ORTE_JOBID, &(orte_process_info.my_name->jobid), - ORTE_RMGR_ATTR_OVERRIDE))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attributes); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - - /* tell the RTE that we want to the children to run inside of our allocation - - * don't go get one just for them - */ - if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RAS_USE_PARENT_ALLOCATION, - ORTE_JOBID, &(orte_process_info.my_name->jobid), - ORTE_RMGR_ATTR_OVERRIDE))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attributes); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - - /* tell the RTE that we want the children mapped the same way as their parent */ - if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMAPS_USE_PARENT_PLAN, - ORTE_JOBID, &(orte_process_info.my_name->jobid), - ORTE_RMGR_ATTR_OVERRIDE))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attributes); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - -#if 0 - /* tell the RTE that we want to be cross-connected to the children so we receive - * their ORTE-level information - e.g., OOB contact info - when they - * reach the STG1 stage gate - */ - state = ORTE_PROC_STATE_AT_STG1; - if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMGR_XCONNECT_AT_SPAWN, - ORTE_PROC_STATE, &state, - ORTE_RMGR_ATTR_OVERRIDE))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attributes); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } -#endif - - /* check for timing request - get stop time and report elapsed time if so */ - if (timing) { - if (0 != gettimeofday(&ompistop, NULL)) { - opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); - } else { - opal_output(0, "ompi_comm_start_procs: time from start to prepare to spawn %ld usec", - (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + - (ompistop.tv_usec - ompistart.tv_usec))); - if (0 != gettimeofday(&ompistart, NULL)) { - opal_output(0, "ompi_comm_start_procs: could not obtain new start time"); - ompistart.tv_sec = ompistop.tv_sec; - ompistart.tv_usec = ompistop.tv_usec; - } - } - } - - /* spawn procs */ - rc = orte_rmgr.spawn_job(apps, count, &new_jobid, 0, NULL, NULL, - ORTE_PROC_STATE_NONE, &attributes); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - - /* check for timing request - get stop time and report elapsed time if so */ - if (timing) { - if (0 != gettimeofday(&ompistop, NULL)) { - opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); - } else { - opal_output(0, "ompi_comm_start_procs: time to spawn %ld usec", - (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + - (ompistop.tv_usec - ompistart.tv_usec))); - } - } - - /* clean up */ - opal_progress_event_users_decrement(); - while (NULL != (item = opal_list_remove_first(&attributes))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&attributes); - - for ( i=0; ic_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT"); - } - - return OMPI_SUCCESS; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* this routine runs through the list of communicators and - and does the disconnect for all dynamic communicators */ -int ompi_comm_dyn_finalize (void) -{ - int i,j=0, max=0; - ompi_comm_disconnect_obj **objs=NULL; - ompi_communicator_t *comm=NULL; - - if ( 1 size = ompi_comm_remote_size (comm); - } else { - obj->size = ompi_comm_size (comm); - } - - obj->comm = comm; - obj->reqs = (ompi_request_t **) malloc(2*obj->size*sizeof(ompi_request_t *)); - if ( NULL == obj->reqs ) { - free (obj); - return NULL; - } - - /* initiate all isend_irecvs. We use a dummy buffer stored on - the object, since we are sending zero size messages anyway. */ - for ( i=0; i < obj->size; i++ ) { - ret = MCA_PML_CALL(irecv (&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, comm, - &(obj->reqs[2*i]))); - - if ( OMPI_SUCCESS != ret ) { - free (obj->reqs); - free (obj); - return NULL; - } - - ret = MCA_PML_CALL(isend (&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, - MCA_PML_BASE_SEND_SYNCHRONOUS, - comm, &(obj->reqs[2*i+1]))); - - if ( OMPI_SUCCESS != ret ) { - free (obj->reqs); - free (obj); - return NULL; - } - } - - /* return handle */ - return obj; -} -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* - count how many requests are active - * - generate a request array large enough to hold - all active requests - * - call waitall on the overall request array - * - free the objects - */ -void ompi_comm_disconnect_waitall (int count, ompi_comm_disconnect_obj **objs) -{ - - ompi_request_t **reqs=NULL; - char *treq=NULL; - int totalcount = 0; - int i; - int ret; - - for (i=0; isize; - } - - reqs = (ompi_request_t **) malloc (2*totalcount*sizeof(ompi_request_t *)); - if ( NULL == reqs ) { - printf("ompi_comm_disconnect_waitall: error allocating memory\n"); - return; - } - - /* generate a single, large array of pending requests */ - treq = (char *)reqs; - for (i=0; ireqs, 2*objs[i]->size * sizeof(ompi_request_t *)); - treq += 2*objs[i]->size * sizeof(ompi_request_t *); - } - - /* force all non-blocking all-to-alls to finish */ - ret = ompi_request_wait_all (2*totalcount, reqs, MPI_STATUSES_IGNORE); - - /* Finally, free everything */ - for (i=0; i< count; i++ ) { - if (NULL != objs[i]->reqs ) { - free (objs[i]->reqs ); - free (objs[i]); - } - } - - free (reqs); - - /* decrease the counter for dynamic communicators by 'count'. - Attention, this approach now requires, that we are just using - these routines for communicators which have been flagged dynamic */ - ompi_comm_num_dyncomm -=count; - - return; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -#define OMPI_COMM_MAXJOBIDS 64 -void ompi_comm_mark_dyncomm (ompi_communicator_t *comm) -{ - int i, j, numjobids=0; - int size, rsize; - int found; - orte_jobid_t jobids[OMPI_COMM_MAXJOBIDS], thisjobid; - ompi_group_t *grp=NULL; - ompi_proc_t *proc = NULL; - - /* special case for MPI_COMM_NULL */ - if ( comm == MPI_COMM_NULL ) { - return; - } - - size = ompi_comm_size (comm); - rsize = ompi_comm_remote_size(comm); - - /* loop over all processes in local group and count number - of different jobids. */ - grp = comm->c_local_group; - for (i=0; i< size; i++) { - proc = ompi_group_peer_lookup(grp,i); - thisjobid = proc->proc_name.jobid; - found = 0; - for ( j=0; jc_remote_group; - for (i=0; i< rsize; i++) { - proc = ompi_group_peer_lookup(grp,i); - thisjobid = proc->proc_name.jobid; - found = 0; - for ( j=0; j 1 ) { - ompi_comm_num_dyncomm++; - OMPI_COMM_SET_DYNAMIC(comm); - } - - return; -} diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index ed43583081..348af04443 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -28,11 +28,11 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/coll/base/base.h" #include "ompi/mca/topo/base/base.h" -#include "orte/mca/ns/base/base.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/attribute/attribute.h" #include "ompi/mca/topo/topo.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" /* @@ -210,7 +210,7 @@ int ompi_comm_finalize(void) OBJ_DESTRUCT( &ompi_mpi_comm_self ); /* disconnect all dynamic communicators */ - ompi_comm_dyn_finalize(); + ompi_dpm.dyn_finalize(); /* Shut down MPI_COMM_WORLD */ OBJ_DESTRUCT( &ompi_mpi_comm_world ); diff --git a/ompi/communicator/comm_publish.c b/ompi/communicator/comm_publish.c deleted file mode 100644 index bee7f9e7f0..0000000000 --- a/ompi/communicator/comm_publish.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Cisco, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include - -#include "ompi/communicator/communicator.h" -#include "ompi/proc/proc.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/rml/rml_types.h" - -#define OMPI_COMM_PORT_KEY "ompi-port-name" - - -int ompi_open_port(char *port_name) -{ - ompi_proc_t **myproc=NULL; - char *name=NULL; - size_t size=0; - orte_rml_tag_t lport_id=0; - int rc; - - /* - * The port_name is equal to the OOB-contact information - * and an integer. The reason for adding the integer is - * to make the port unique for multi-threaded scenarios. - */ - - myproc = ompi_proc_self (&size); - if (ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string (&name, &(myproc[0]->proc_name)))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns.assign_rml_tag(&lport_id, NULL))) { - return rc; - } - - sprintf (port_name, "%s:%d", name, lport_id); - free ( myproc ); - free ( name ); - - return OMPI_SUCCESS; -} - -/* takes a port_name and separates it into the process_name - and the tag -*/ -char *ompi_parse_port (char *port_name, orte_rml_tag_t *tag) -{ - char tmp_port[MPI_MAX_PORT_NAME], *tmp_string; - - tmp_string = (char *) malloc (MPI_MAX_PORT_NAME); - if (NULL == tmp_string ) { - return NULL; - } - - strncpy (tmp_port, port_name, MPI_MAX_PORT_NAME); - strncpy (tmp_string, strtok(tmp_port, ":"), MPI_MAX_PORT_NAME); - sscanf( strtok(NULL, ":"),"%d", (int*)tag); - - return tmp_string; -} - -/* - * publish the port_name using the service_name as a token - * jobid and vpid are used later to make - * sure, that only this process can unpublish the information. - */ -int ompi_comm_namepublish ( char *service_name, char *port_name ) -{ - orte_gpr_value_t *value; - int rc; - - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_TOKENS_AND | ORTE_GPR_OVERWRITE, - OMPI_NAMESPACE_SEGMENT, 1, 1))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - value->tokens[0] = strdup(service_name); - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), OMPI_COMM_PORT_KEY, ORTE_STRING, port_name))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(value); - return rc; -} - -char* ompi_comm_namelookup ( char *service_name ) -{ - char *token[2], *key[2]; - orte_gpr_keyval_t **keyvals=NULL; - orte_gpr_value_t **values; - orte_std_cntr_t cnt=0; - char *stmp=NULL; - int ret; - - token[0] = service_name; - token[1] = NULL; - - key[0] = strdup(OMPI_COMM_PORT_KEY); - key[1] = NULL; - - ret = orte_gpr.get(ORTE_GPR_TOKENS_AND, OMPI_NAMESPACE_SEGMENT, - token, key, &cnt, &values); - if (ORTE_SUCCESS != ret) { - return NULL; - } - if ( 0 < cnt && NULL != values[0] ) { /* should be only one, if any */ - keyvals = values[0]->keyvals; - stmp = strdup((const char*)keyvals[0]->value->data); - OBJ_RELEASE(values[0]); - } - - return (stmp); -} - -/* - * delete the entry. Just the process who has published - * the service_name, has the right to remove this - * service. Will be done later, by adding jobid and vpid - * as tokens - */ -int ompi_comm_nameunpublish ( char *service_name ) -{ - char *token[2]; - - token[0] = service_name; - token[1] = NULL; -#if 0 - return orte_gpr.delete_entries(ORTE_GPR_TOKENS_AND, - OMPI_NAMESPACE_SEGMENT, - token, NULL); -#endif - return OMPI_SUCCESS; -} diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index 6401900f52..2a53bc95ed 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -65,7 +65,6 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); /* a set of special tags: */ /* to recognize an MPI_Comm_join in the comm_connect_accept routine. */ -#define OMPI_COMM_JOIN_TAG -32000 #define OMPI_COMM_ALLGATHER_TAG -31078 #define OMPI_COMM_BARRIER_TAG -31079 @@ -356,7 +355,7 @@ struct ompi_communicator_t { * the OOB version. * This routine has to be thread safe in the final version. */ - int ompi_comm_nextcid ( ompi_communicator_t* newcomm, +OMPI_DECLSPEC int ompi_comm_nextcid ( ompi_communicator_t* newcomm, ompi_communicator_t* oldcomm, ompi_communicator_t* bridgecomm, void* local_leader, @@ -373,7 +372,7 @@ struct ompi_communicator_t { * This is THE routine, where all the communicator stuff * is really set. */ - int ompi_comm_set ( ompi_communicator_t** newcomm, +OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, ompi_communicator_t* oldcomm, int local_size, int *local_ranks, @@ -412,7 +411,7 @@ struct ompi_communicator_t { int high ); - int ompi_comm_activate ( ompi_communicator_t* newcomm, +OMPI_DECLSPEC int ompi_comm_activate ( ompi_communicator_t* newcomm, ompi_communicator_t* oldcomm, ompi_communicator_t* bridgecomm, void* local_leader, @@ -427,35 +426,9 @@ struct ompi_communicator_t { */ int ompi_comm_dump ( ompi_communicator_t *comm ); - /** - * a simple function to determint a port number - */ - int ompi_open_port (char *port_name); - - /** - * takes a port_name and returns the oob-contact information - * and the tag - */ - char * ompi_parse_port (char *port_name, orte_rml_tag_t *tag) ; - - /** - * routines handling name publishing, lookup and unpublishing - */ - int ompi_comm_namepublish ( char *service_name, char *port_name ); - char* ompi_comm_namelookup ( char *service_name ); - int ompi_comm_nameunpublish ( char *service_name ); - - /* setting name */ int ompi_comm_set_name (ompi_communicator_t *comm, char *name ); - /* THE routine for dynamic process management. This routine - sets the connection up between two independent applications. - */ - int ompi_comm_connect_accept ( ompi_communicator_t *comm, int root, - orte_process_name_t *port, int send_first, - ompi_communicator_t **newcomm, orte_rml_tag_t tag); - /* * these are the init and finalize functions for the comm_reg * stuff. These routines are necessary for handling multi-threading @@ -464,59 +437,9 @@ struct ompi_communicator_t { void ompi_comm_reg_init(void); void ompi_comm_reg_finalize(void); - /* start the new processes from MPI_Comm_spawn_multiple. Initial - * version, very rough - */ - int ompi_comm_start_processes(int count, char **array_of_commands, - char ***array_of_argv, - int *array_of_maxprocs, - MPI_Info *array_of_info, - char *port_name); - - /* - * This routine checks, whether an application has been spawned - * by another MPI application, or has been independently started. - * If it has been spawned, it establishes the parent communicator. - * Since the routine has to communicate, it should be among the last - * steps in MPI_Init, to be sure that everything is already set up. - */ - int ompi_comm_dyn_init(void); - - /** - * Executes internally a disconnect on all dynamic communicators - * in case the user did not disconnect them. - */ - int ompi_comm_dyn_finalize(void); - - /* this routine counts the number of different jobids of the processes - given in a certain communicator. If there is more than one jobid, - we mark the communicator as 'dynamic'. This is especially relevant - for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have - to wait for all still connected processes. */ + /* global variable to save the number od dynamic communicators */ extern int ompi_comm_num_dyncomm; - void ompi_comm_mark_dyncomm (ompi_communicator_t *comm); - /* the next two routines implement a kind of non-blocking barrier. - the only difference is, that you can wait for the completion - of more than one initiated ibarrier. This is required for waiting - for all still connected processes in MPI_Finalize. - - ompi_comm_disconnect_init returns a handle, which has to be passed in - to ompi_comm_disconnect_waitall. The second routine blocks, until - all non-blocking barriers described by the handles are finished. - The communicators can than be released. - */ - - struct ompi_comm_disconnect_obj { - ompi_communicator_t *comm; - int size; - struct ompi_request_t **reqs; - int buf; - }; - typedef struct ompi_comm_disconnect_obj ompi_comm_disconnect_obj; - - ompi_comm_disconnect_obj *ompi_comm_disconnect_init (ompi_communicator_t *comm); - void ompi_comm_disconnect_waitall (int count, ompi_comm_disconnect_obj **objs ); END_C_DECLS diff --git a/ompi/datatype/convertor.c b/ompi/datatype/convertor.c index 70daae2bc8..836ab11f44 100644 --- a/ompi/datatype/convertor.c +++ b/ompi/datatype/convertor.c @@ -24,6 +24,7 @@ #ifdef HAVE_STRINGS_H #include #endif +#include #include "opal/prefetch.h" diff --git a/ompi/datatype/datatype_unpack.c b/ompi/datatype/datatype_unpack.c index 17c9557af0..1884772e16 100644 --- a/ompi/datatype/datatype_unpack.c +++ b/ompi/datatype/datatype_unpack.c @@ -24,6 +24,8 @@ #include "ompi/datatype/convertor_internal.h" #include "ompi/datatype/datatype_internal.h" +#include + #if OMPI_ENABLE_DEBUG extern int ompi_unpack_debug; #define DO_DEBUG(INST) if( ompi_unpack_debug ) { INST } diff --git a/ompi/datatype/dt_module.c b/ompi/datatype/dt_module.c index c272186736..71619ca3a0 100644 --- a/ompi/datatype/dt_module.c +++ b/ompi/datatype/dt_module.c @@ -25,6 +25,7 @@ #include "ompi/datatype/datatype.h" #include "ompi/datatype/datatype_internal.h" #include "ompi/datatype/convertor_internal.h" +#include #if OMPI_ENABLE_DEBUG #include "opal/mca/base/mca_base_param.h" diff --git a/ompi/group/group.h b/ompi/group/group.h index cd0ad089e1..2fc98e9c7a 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -136,7 +136,7 @@ OMPI_DECLSPEC extern ompi_group_t ompi_mpi_group_null; * * @return Pointer to new group structure */ -ompi_group_t *ompi_group_allocate(int group_size); +OMPI_DECLSPEC ompi_group_t *ompi_group_allocate(int group_size); ompi_group_t *ompi_group_allocate_sporadic(int group_size); ompi_group_t *ompi_group_allocate_strided(void); ompi_group_t *ompi_group_allocate_bmap(int orig_group_size, int group_size); diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index 336f03f012..c86a72d370 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -286,7 +286,7 @@ typedef int (MPI_Grequest_cancel_function)(void *, int); #define MPI_ARGV_NULL ((char **) 0) /* NULL argument vector */ #define MPI_ARGVS_NULL ((char ***) 0) /* NULL argument vectors */ #define MPI_ERRCODES_IGNORE ((int *) 0) /* don't return error codes */ -#define MPI_MAX_PORT_NAME 36 /* max port name length */ +#define MPI_MAX_PORT_NAME 256 /* max port name length */ #define MPI_MAX_NAME_LEN MPI_MAX_PORT_NAME /* max port name length */ #define MPI_ORDER_C 0 /* C row major order */ #define MPI_ORDER_FORTRAN 1 /* Fortran column major order */ diff --git a/ompi/include/mpif-common.h b/ompi/include/mpif-common.h index 8042bb0554..adeab15dae 100644 --- a/ompi/include/mpif-common.h +++ b/ompi/include/mpif-common.h @@ -133,7 +133,7 @@ parameter (MPI_BSEND_OVERHEAD=128) parameter (MPI_MAX_INFO_KEY=35) parameter (MPI_MAX_INFO_VAL=255) - parameter (MPI_MAX_PORT_NAME=35) + parameter (MPI_MAX_PORT_NAME=255) parameter (MPI_MAX_OBJECT_NAME=63) parameter (MPI_ORDER_C=0) parameter (MPI_ORDER_FORTRAN=1) diff --git a/ompi/include/ompi/constants.h b/ompi/include/ompi/constants.h index 86f543f520..f4fb75b8aa 100644 --- a/ompi/include/ompi/constants.h +++ b/ompi/include/ompi/constants.h @@ -19,7 +19,7 @@ #ifndef OMPI_CONSTANTS_H #define OMPI_CONSTANTS_H -#include "orte/orte_constants.h" +#include "orte/constants.h" #define OMPI_ERR_BASE ORTE_ERR_MAX diff --git a/ompi/info/info.c b/ompi/info/info.c index 5385a7b3e5..37745771ce 100644 --- a/ompi/info/info.c +++ b/ompi/info/info.c @@ -10,6 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,6 +28,7 @@ #include #endif #include +#include #include "ompi/constants.h" #include "ompi/info/info.h" @@ -207,6 +209,46 @@ int ompi_info_get (ompi_info_t *info, char *key, int valuelen, } +/* + * Similar to ompi_info_get(), but cast the result into a boolean + * using some well-defined rules. + */ +int ompi_info_get_bool(ompi_info_t *info, char *key, bool *value, int *flag) +{ + char *ptr; + char str[256]; + + str[sizeof(str) - 1] = '\0'; + ompi_info_get(info, key, sizeof(str) - 1, str, flag); + if (*flag) { + *value = false; + + /* Trim whitespace */ + ptr = str + sizeof(str) - 1; + while (ptr >= str && isspace(*ptr)) { + *ptr = '\0'; + --ptr; + } + ptr = str; + while (ptr < str + sizeof(str) - 1 && *ptr != '\0' && + isspace(*ptr)) { + ++ptr; + } + if ('\0' != *ptr) { + if (isdigit(*ptr)) { + *value = (bool) atoi(ptr); + } else if (0 == strcasecmp(ptr, "yes") || + 0 == strcasecmp(ptr, "true")) { + *value = true; + } else if (0 != strcasecmp(ptr, "no") && + 0 != strcasecmp(ptr, "false")) { + /* RHC unrecognized value -- print a warning? */ + } + } + } + return MPI_SUCCESS; +} + /* * Delete a key from an info */ diff --git a/ompi/info/info.h b/ompi/info/info.h index c58452f55e..cc70e59b24 100644 --- a/ompi/info/info.h +++ b/ompi/info/info.h @@ -10,6 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -148,6 +149,32 @@ int ompi_info_set (ompi_info_t *info, char *key, char *value); */ int ompi_info_free (ompi_info_t **info); + /** + * Get a (key, value) pair from an 'MPI_Info' object and assign it + * into a boolen output. + * + * @param info Pointer to ompi_info_t object + * @param key null-terminated character string of the index key + * @param value Boolean output value + * @param flag true (1) if 'key' defined on 'info', false (0) if not + * (logical) + * + * @retval MPI_SUCCESS + * + * If found, the string value will be cast to the boolen output in + * the following manner: + * + * - If the string value is digits, the return value is "(bool) + * atoi(value)" + * - If the string value is (case-insensitive) "yes" or "true", the + * result is true + * - If the string value is (case-insensitive) "no" or "false", the + * result is false + * - All other values are false + */ +OMPI_DECLSPEC int ompi_info_get_bool (ompi_info_t *info, char *key, bool *value, + int *flag); + /** * Get a (key, value) pair from an 'MPI_Info' object * @@ -163,8 +190,8 @@ int ompi_info_free (ompi_info_t **info); * In C and C++, 'valuelen' should be one less than the allocated * space to allow for for the null terminator. */ -int ompi_info_get (ompi_info_t *info, char *key, int valuelen, - char *value, int *flag); +OMPI_DECLSPEC int ompi_info_get (ompi_info_t *info, char *key, int valuelen, + char *value, int *flag); /** * Delete a (key,value) pair from "info" diff --git a/ompi/mca/bml/r2/bml_r2.c b/ompi/mca/bml/r2/bml_r2.c index c94bd3eaad..28120e3d59 100644 --- a/ompi/mca/bml/r2/bml_r2.c +++ b/ompi/mca/bml/r2/bml_r2.c @@ -24,7 +24,6 @@ #include #include "opal/util/show_help.h" -#include "orte/mca/ns/ns.h" #include "ompi/class/ompi_bitmap.h" #include "ompi/mca/bml/bml.h" #include "ompi/mca/bml/base/base.h" @@ -34,6 +33,7 @@ #include "ompi/mca/bml/base/bml_base_btl.h" #include "bml_r2.h" #include "orte/class/orte_proc_table.h" +#include "orte/util/name_fns.h" #include "ompi/proc/proc.h" extern mca_bml_base_component_t mca_bml_r2_component; @@ -450,9 +450,9 @@ int mca_bml_r2_add_procs( OMPI_ERR_UNREACH == ret) { char *local, *remote; - orte_ns.get_proc_name_string(&local, + orte_util_convert_process_name_to_string(&local, &(ompi_proc_local_proc->proc_name)); - orte_ns.get_proc_name_string(&remote, + orte_util_convert_process_name_to_string(&remote, &(unreach_proc->proc_name)); opal_show_help("help-mca-bml-r2", diff --git a/ompi/mca/bml/r2/bml_r2_ft.c b/ompi/mca/bml/r2/bml_r2_ft.c index 72ca634f01..8367d94ba1 100644 --- a/ompi/mca/bml/r2/bml_r2_ft.c +++ b/ompi/mca/bml/r2/bml_r2_ft.c @@ -24,7 +24,6 @@ #include #include "opal/util/show_help.h" -#include "orte/mca/ns/ns.h" #include "ompi/runtime/ompi_cr.h" #include "ompi/class/ompi_bitmap.h" #include "ompi/mca/bml/bml.h" @@ -35,9 +34,7 @@ #include "ompi/mca/bml/base/bml_base_btl.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/gpr/gpr.h" #include "orte/class/orte_proc_table.h" #include "ompi/proc/proc.h" @@ -117,9 +114,11 @@ int mca_bml_r2_ft_event(int state) { if( NULL != mca_bml_r2.btl_modules) { free( mca_bml_r2.btl_modules); + mca_bml_r2.btl_modules = NULL; } if( NULL != mca_bml_r2.btl_progress ) { free( mca_bml_r2.btl_progress); + mca_bml_r2.btl_progress = NULL; } opal_output_verbose(10, ompi_cr_output, @@ -163,8 +162,10 @@ int mca_bml_r2_ft_event(int state) { mca_bml_r2.btls_added = false; for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]->proc_bml); - procs[p]->proc_bml = NULL; + if( NULL != procs[p]->proc_bml) { + OBJ_RELEASE(procs[p]->proc_bml); + procs[p]->proc_bml = NULL; + } OBJ_RELEASE(procs[p]); } diff --git a/ompi/mca/btl/base/btl_base_error.c b/ompi/mca/btl/base/btl_base_error.c index 2caac992e6..efa06ca59c 100644 --- a/ompi/mca/btl/base/btl_base_error.c +++ b/ompi/mca/btl/base/btl_base_error.c @@ -25,8 +25,11 @@ #include "base.h" #include "btl_base_error.h" #include "opal/util/show_help.h" + #include "orte/util/sys_info.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" int mca_btl_base_verbose; @@ -60,7 +63,7 @@ void mca_btl_base_error_no_nics(const char* transport, char *procid; if (mca_btl_base_warn_component_unused) { /* print out no-nic warning if user told us to */ - asprintf(&procid, "%s", ORTE_NAME_PRINT(orte_process_info.my_name)); + asprintf(&procid, "%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_show_help("help-mpi-btl-base.txt", "btl:no-nics", true, procid, transport, orte_system_info.nodename, diff --git a/ompi/mca/btl/base/btl_base_error.h b/ompi/mca/btl/base/btl_base_error.h index 87d22c2172..42aab4195e 100644 --- a/ompi/mca/btl/base/btl_base_error.h +++ b/ompi/mca/btl/base/btl_base_error.h @@ -28,7 +28,8 @@ #include "orte/util/proc_info.h" #include "orte/util/sys_info.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" OMPI_DECLSPEC extern int mca_btl_base_verbose; @@ -39,7 +40,7 @@ extern int mca_btl_base_out(const char*, ...); do { \ mca_btl_base_out("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__); \ mca_btl_base_out args; \ mca_btl_base_out("\n"); \ @@ -50,7 +51,7 @@ do { \ do { \ mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__); \ mca_btl_base_err args; \ mca_btl_base_err("\n"); \ @@ -59,7 +60,7 @@ do { \ #define BTL_PEER_ERROR(proc, args) \ do { \ mca_btl_base_err("%s[%s:%d:%s] from %s ", \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__, \ orte_system_info.nodename); \ if(proc && proc->proc_hostname) { \ @@ -76,7 +77,7 @@ do { \ if(mca_btl_base_verbose > 0) { \ mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__); \ mca_btl_base_err args; \ mca_btl_base_err("\n"); \ @@ -89,13 +90,9 @@ do { \ #endif -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS OMPI_DECLSPEC extern void mca_btl_base_error_no_nics(const char* transport, const char* nic_name); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS diff --git a/ompi/mca/btl/base/btl_base_select.c b/ompi/mca/btl/base/btl_base_select.c index a8c1378608..49fc876972 100644 --- a/ompi/mca/btl/base/btl_base_select.c +++ b/ompi/mca/btl/base/btl_base_select.c @@ -154,7 +154,7 @@ int mca_btl_base_select(bool enable_progress_threads, if (0 == opal_list_get_size(&mca_btl_base_modules_initialized)) { opal_show_help("help-mca-base.txt", "find-available:none-found", true, "btl"); - orte_errmgr.error_detected(1, NULL); + orte_errmgr.abort(1, NULL); } return OMPI_SUCCESS; } diff --git a/ompi/mca/btl/elan/btl_elan_component.c b/ompi/mca/btl/elan/btl_elan_component.c index 56e79b205d..f90986387c 100644 --- a/ompi/mca/btl/elan/btl_elan_component.c +++ b/ompi/mca/btl/elan/btl_elan_component.c @@ -23,6 +23,9 @@ #include "ompi/runtime/ompi_module_exchange.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/runtime/orte_globals.h" +#include "ompi/mca/mpool/base/base.h" + #include "btl_elan.h" #include "btl_elan_frag.h" #include "btl_elan_endpoint.h" @@ -225,6 +228,8 @@ mca_btl_elan_component_init( int *num_btl_modules, mca_btl_elan_component.elan_free_list_inc, NULL ); /* use default allocator */ + vpid = ORTE_PROC_MY_NAME->vpid; + ompi_modex_send( &mca_btl_elan_component.super.btl_version, &vpid, sizeof(vpid)); diff --git a/ompi/mca/btl/elan/btl_elan_endpoint.c b/ompi/mca/btl/elan/btl_elan_endpoint.c index 6edb00cbe4..241005622a 100644 --- a/ompi/mca/btl/elan/btl_elan_endpoint.c +++ b/ompi/mca/btl/elan/btl_elan_endpoint.c @@ -14,11 +14,10 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "btl_elan.h" #include "btl_elan_endpoint.h" #include "btl_elan_proc.h" diff --git a/ompi/mca/btl/elan/btl_elan_proc.h b/ompi/mca/btl/elan/btl_elan_proc.h index 162be777dd..5a439b48fe 100644 --- a/ompi/mca/btl/elan/btl_elan_proc.h +++ b/ompi/mca/btl/elan/btl_elan_proc.h @@ -12,7 +12,6 @@ #ifndef MCA_BTL_ELAN_PROC_H #define MCA_BTL_ELAN_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_elan.h" diff --git a/ompi/mca/btl/gm/btl_gm_component.c b/ompi/mca/btl/gm/btl_gm_component.c index fc779ba8f8..42bdad47c4 100644 --- a/ompi/mca/btl/gm/btl_gm_component.c +++ b/ompi/mca/btl/gm/btl_gm_component.c @@ -44,6 +44,8 @@ #include "ompi/datatype/convertor.h" #include "btl_gm_endpoint.h" #include "orte/util/proc_info.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" #include "ompi/runtime/ompi_module_exchange.h" @@ -440,7 +442,7 @@ static int mca_btl_gm_discover( void ) "%s gm_port %08lX, " "board %" PRIu32 ", global %" PRIu32 " " "node %" PRIu32 "port %" PRIu32 "\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long) port, board_no, global_id, node_id, port_no); } diff --git a/ompi/mca/btl/gm/btl_gm_endpoint.c b/ompi/mca/btl/gm/btl_gm_endpoint.c index 8e4b74dbec..3d964a7433 100644 --- a/ompi/mca/btl/gm/btl_gm_endpoint.c +++ b/ompi/mca/btl/gm/btl_gm_endpoint.c @@ -21,11 +21,6 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" #include "btl_gm.h" #include "btl_gm_endpoint.h" #include "btl_gm_proc.h" diff --git a/ompi/mca/btl/gm/btl_gm_proc.c b/ompi/mca/btl/gm/btl_gm_proc.c index cb2b11d3cf..476fa4ddee 100644 --- a/ompi/mca/btl/gm/btl_gm_proc.c +++ b/ompi/mca/btl/gm/btl_gm_proc.c @@ -25,6 +25,9 @@ #include #endif +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "btl_gm.h" #include "btl_gm_proc.h" @@ -191,7 +194,7 @@ int mca_btl_gm_proc_insert( if(mca_btl_gm_component.gm_debug > 0) { opal_output(0, "%s mapped global id %" PRIu32 " to node id %" PRIu32 "\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), gm_endpoint->endpoint_addr.global_id, gm_endpoint->endpoint_addr.node_id); } diff --git a/ompi/mca/btl/gm/btl_gm_proc.h b/ompi/mca/btl/gm/btl_gm_proc.h index d3cd7fd470..ac23f28346 100644 --- a/ompi/mca/btl/gm/btl_gm_proc.h +++ b/ompi/mca/btl/gm/btl_gm_proc.h @@ -19,7 +19,6 @@ #ifndef MCA_BTL_GM_PROC_H #define MCA_BTL_GM_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_gm.h" diff --git a/ompi/mca/btl/mx/btl_mx_endpoint.c b/ompi/mca/btl/mx/btl_mx_endpoint.c index bc0a07068f..2b749f52ee 100644 --- a/ompi/mca/btl/mx/btl_mx_endpoint.c +++ b/ompi/mca/btl/mx/btl_mx_endpoint.c @@ -21,10 +21,6 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/errmgr/errmgr.h" #include "btl_mx.h" #include "btl_mx_endpoint.h" #include "btl_mx_proc.h" diff --git a/ompi/mca/btl/mx/btl_mx_proc.c b/ompi/mca/btl/mx/btl_mx_proc.c index dbd9535b4a..8fa0991231 100644 --- a/ompi/mca/btl/mx/btl_mx_proc.c +++ b/ompi/mca/btl/mx/btl_mx_proc.c @@ -19,6 +19,7 @@ #include "ompi_config.h" #include "opal/class/opal_hash_table.h" +#include "orte/util/name_fns.h" #include "ompi/runtime/ompi_module_exchange.h" #include "btl_mx.h" diff --git a/ompi/mca/btl/mx/btl_mx_proc.h b/ompi/mca/btl/mx/btl_mx_proc.h index f5aa8f4d9f..037af79239 100644 --- a/ompi/mca/btl/mx/btl_mx_proc.h +++ b/ompi/mca/btl/mx/btl_mx_proc.h @@ -19,7 +19,6 @@ #ifndef MCA_BTL_MX_PROC_H #define MCA_BTL_MX_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_mx.h" diff --git a/ompi/mca/btl/ofud/btl_ofud_component.c b/ompi/mca/btl/ofud/btl_ofud_component.c index bdc0297a8d..7ce5c9dd48 100644 --- a/ompi/mca/btl/ofud/btl_ofud_component.c +++ b/ompi/mca/btl/ofud/btl_ofud_component.c @@ -34,6 +34,8 @@ #include "ompi/mca/mpool/rdma/mpool_rdma.h" #include "ompi/runtime/ompi_module_exchange.h" +#include "orte/runtime/orte_globals.h" + #include "btl_ofud.h" #include "btl_ofud_frag.h" #include "btl_ofud_endpoint.h" @@ -280,7 +282,7 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules, *num_btl_modules = 0; num_devs = 0; - seedv[0] = orte_process_info.my_name->vpid; + seedv[0] = ORTE_PROC_MY_NAME->vpid; seedv[1] = opal_sys_timer_get_cycles(); seedv[2] = opal_sys_timer_get_cycles(); seed48(seedv); diff --git a/ompi/mca/btl/ofud/btl_ofud_proc.h b/ompi/mca/btl/ofud/btl_ofud_proc.h index c9994f8afa..ea4b5f448d 100644 --- a/ompi/mca/btl/ofud/btl_ofud_proc.h +++ b/ompi/mca/btl/ofud/btl_ofud_proc.h @@ -22,7 +22,6 @@ #define MCA_BTL_UD_PROC_H #include "opal/class/opal_object.h" -#include "orte/mca/ns/ns.h" #include "ompi/proc/proc.h" #include "btl_ofud.h" diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c index 29b3e27e28..2ba9346999 100644 --- a/ompi/mca/btl/openib/btl_openib.c +++ b/ompi/mca/btl/openib/btl_openib.c @@ -57,6 +57,7 @@ #ifdef HAVE_SYS_RESOURCE_H #include #endif +#include mca_btl_openib_module_t mca_btl_openib_module = { { diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index 6b4440a7e2..a979eae7bc 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -46,6 +46,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/sys_info.h" +#include "orte/runtime/orte_globals.h" #include "ompi/proc/proc.h" #include "ompi/mca/pml/pml.h" @@ -1315,7 +1316,7 @@ btl_openib_component_init(int *num_btl_modules, *num_btl_modules = 0; num_devs = 0; - seedv[0] = orte_process_info.my_name->vpid; + seedv[0] = ORTE_PROC_MY_NAME->vpid; seedv[1] = opal_sys_timer_get_cycles(); seedv[2] = opal_sys_timer_get_cycles(); seed48(seedv); diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c index 164176a36d..ae639421cb 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.c +++ b/ompi/mca/btl/openib/btl_openib_endpoint.c @@ -30,11 +30,9 @@ #include #include -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" #include "ompi/types.h" #include "ompi/mca/pml/base/pml_base_sendreq.h" diff --git a/ompi/mca/btl/openib/btl_openib_proc.h b/ompi/mca/btl/openib/btl_openib_proc.h index d0f35f9c01..f23239fe55 100644 --- a/ompi/mca/btl/openib/btl_openib_proc.h +++ b/ompi/mca/btl/openib/btl_openib_proc.h @@ -20,7 +20,6 @@ #ifndef MCA_BTL_IB_PROC_H #define MCA_BTL_IB_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_openib.h" diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c b/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c index d8175a6e37..3009c90b54 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c @@ -22,11 +22,14 @@ #include "ompi_config.h" -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + +#include "ompi/mca/dpm/dpm.h" #include "btl_openib.h" #include "btl_openib_endpoint.h" @@ -59,10 +62,10 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, uint8_t message_type); static void rml_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); static void rml_recv_cb(int status, orte_process_name_t* process_name, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); /* @@ -107,7 +110,7 @@ static int oob_init(void) int rc; rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_OPENIB, + OMPI_RML_TAG_OPENIB, ORTE_RML_PERSISTENT, rml_recv_cb, NULL); @@ -158,7 +161,7 @@ static int oob_query(mca_btl_openib_hca_t *hca) */ static int oob_finalize(void) { - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_OPENIB); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB); return OMPI_SUCCESS; } @@ -416,7 +419,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp, static int send_connect_data(mca_btl_base_endpoint_t* endpoint, uint8_t message_type) { - orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t); + opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t); int rc; if (NULL == buffer) { @@ -425,15 +428,15 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, } /* pack the info in the send buffer */ - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT8)); - rc = orte_dss.pack(buffer, &message_type, 1, ORTE_UINT8); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8)); + rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT64)); - rc = orte_dss.pack(buffer, &endpoint->subnet_id, 1, ORTE_UINT64); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64)); + rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -441,16 +444,16 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, if (message_type != ENDPOINT_CONNECT_REQUEST) { /* send the QP connect request info we respond to */ - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->rem_info.rem_qps[0].rem_qp_num, 1, - ORTE_UINT32); + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16)); - rc = orte_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, ORTE_UINT16); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16)); + rc = opal_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -461,37 +464,37 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, int qp; /* stuff all the QP info into the buffer */ for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num, - 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num, + 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1, - ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, ORTE_UINT16); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1, - ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->index, 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -500,7 +503,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, /* send to remote endpoint */ rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, - buffer, ORTE_RML_TAG_OPENIB, 0, + buffer, OMPI_RML_TAG_OPENIB, 0, rml_send_cb, NULL); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -519,7 +522,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, * remote peer */ static void rml_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { OBJ_RELEASE(buffer); @@ -532,7 +535,7 @@ static void rml_send_cb(int status, orte_process_name_t* endpoint, * otherwise try to modify QP's and establish reliable connection */ static void rml_recv_cb(int status, orte_process_name_t* process_name, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { mca_btl_openib_proc_t *ib_proc; @@ -548,29 +551,29 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name, /* start by unpacking data first so we know who is knocking at our door */ - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT8)); - rc = orte_dss.unpack(buffer, &message_type, &cnt, ORTE_UINT8); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8)); + rc = opal_dss.unpack(buffer, &message_type, &cnt, OPAL_UINT8); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT64)); - rc = orte_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, ORTE_UINT64); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64)); + rc = opal_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, OPAL_UINT64); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } if (ENDPOINT_CONNECT_REQUEST != message_type) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &lcl_qp, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &lcl_qp, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16)); - rc = orte_dss.unpack(buffer, &lcl_lid, &cnt, ORTE_UINT16); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16)); + rc = opal_dss.unpack(buffer, &lcl_lid, &cnt, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; @@ -585,36 +588,36 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name, /* unpack all the qp info */ for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt, - ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_psn, &cnt, - ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_psn, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16)); - rc = orte_dss.unpack(buffer, &rem_info.rem_lid, &cnt, ORTE_UINT16); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16)); + rc = opal_dss.unpack(buffer, &rem_info.rem_lid, &cnt, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &rem_info.rem_index, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &rem_info.rem_index, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; @@ -625,7 +628,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name, rem_info.rem_lid, rem_info.rem_subnet_id)); - master = orte_ns.compare_fields(ORTE_NS_CMP_ALL, orte_process_info.my_name, + master = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, process_name) > 0 ? true : false; for (ib_proc = (mca_btl_openib_proc_t*) @@ -635,8 +638,8 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name, ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) { bool found = false; - if (orte_ns.compare_fields(ORTE_NS_CMP_ALL, - &ib_proc->proc_guid, process_name) != ORTE_EQUAL) { + if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &ib_proc->proc_guid, process_name) != OPAL_EQUAL) { continue; } diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c index b52479722a..2345339414 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c @@ -10,11 +10,11 @@ #include "ompi_config.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/oob/base/base.h" +#include "orte/util/name_fns.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" +#include "ompi/mca/dpm/dpm.h" #include "btl_openib.h" #include "btl_openib_endpoint.h" @@ -55,8 +55,6 @@ typedef enum { ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE /* The xrc recv qp already was destroyed */ } connect_message_type_t; -#define XOOB_TAG (ORTE_RML_TAG_DYNAMIC - 1) - #define XOOB_SET_REMOTE_INFO(EP, INFO) \ do { \ /* copy the rem_info stuff */ \ @@ -79,7 +77,7 @@ static int xoob_priority = 60; * remote peer */ static void xoob_rml_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { OBJ_RELEASE(buffer); @@ -87,29 +85,29 @@ static void xoob_rml_send_cb(int status, orte_process_name_t* endpoint, /* Receive connect information to remote endpoint */ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *lid, - uint8_t *message_type, orte_buffer_t* buffer) + uint8_t *message_type, opal_buffer_t* buffer) { int cnt = 1, rc, srq; /* Recv standart header */ - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT8)); - rc = orte_dss.unpack(buffer, message_type, &cnt, ORTE_UINT8); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8)); + rc = opal_dss.unpack(buffer, message_type, &cnt, OPAL_UINT8); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; } BTL_VERBOSE(("Recv unpack Message type = %d", *message_type)); - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT64)); - rc = orte_dss.unpack(buffer, &info->rem_subnet_id, &cnt, ORTE_UINT64); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64)); + rc = opal_dss.unpack(buffer, &info->rem_subnet_id, &cnt, OPAL_UINT64); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; } BTL_VERBOSE(("Recv unpack sid = %d", info->rem_subnet_id)); - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16)); - rc = orte_dss.unpack(buffer, &info->rem_lid, &cnt, ORTE_UINT16); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16)); + rc = opal_dss.unpack(buffer, &info->rem_lid, &cnt, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -121,26 +119,26 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * */ if (ENDPOINT_XOOB_CONNECT_REQUEST == *message_type || ENDPOINT_XOOB_CONNECT_RESPONSE == *message_type) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt, - ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; } BTL_VERBOSE(("Recv unpack remote qp = %x", info->rem_qps->rem_qp_num)); - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_qps->rem_psn, &cnt, - ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_qps->rem_psn, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; } BTL_VERBOSE(("Recv unpack remote psn = %d", info->rem_qps->rem_psn)); - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_mtu, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_mtu, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -151,8 +149,8 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * if (ENDPOINT_XOOB_CONNECT_REQUEST == *message_type || ENDPOINT_XOOB_CONNECT_XRC_REQUEST == *message_type) { /* unpack requested lid info */ - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16)); - rc = orte_dss.unpack(buffer, lid, &cnt, ORTE_UINT16); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16)); + rc = opal_dss.unpack(buffer, lid, &cnt, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -162,10 +160,10 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * /* Unpack requested recv qp number */ if (ENDPOINT_XOOB_CONNECT_XRC_REQUEST == *message_type) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); /* In XRC request case we will use rem_qp_num as container for requested qp number */ - rc = orte_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt, - ORTE_UINT32); + rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -175,8 +173,8 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * if (ENDPOINT_XOOB_CONNECT_RESPONSE == *message_type || ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == *message_type) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_index, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_index, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -184,8 +182,8 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * BTL_VERBOSE(("Recv unpack remote index = %d", info->rem_index)); for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_srqs[srq].rem_srq_num, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_srqs[srq].rem_srq_num, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -202,7 +200,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, uint8_t message_type) { - orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t); + opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t); int rc, srq; if (NULL == buffer) { @@ -217,24 +215,24 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, */ /* pack the info in the send buffer */ BTL_VERBOSE(("Send pack Message type = %d", message_type)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT8)); - rc = orte_dss.pack(buffer, &message_type, 1, ORTE_UINT8); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8)); + rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } BTL_VERBOSE(("Send pack sid = %d", endpoint->subnet_id)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT64)); - rc = orte_dss.pack(buffer, &endpoint->subnet_id, 1, ORTE_UINT64); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64)); + rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } BTL_VERBOSE(("Send pack lid = %d", endpoint->endpoint_btl->lid)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, ORTE_UINT16); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -260,24 +258,24 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, /* stuff all the QP info into the buffer */ /* we need to send only one QP */ BTL_VERBOSE(("Send pack qp num = %x", qp_num)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &qp_num, 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &qp_num, 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } BTL_VERBOSE(("Send pack lpsn = %d", psn)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &psn, 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &psn, 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } BTL_VERBOSE(("Send pack mtu = %d", endpoint->endpoint_btl->hca->mtu)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1, - ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -293,8 +291,8 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, /* when we are sending request we add remote lid that we want to connect */ BTL_VERBOSE(("Send pack remote lid = %d", endpoint->ib_addr->lid)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16)); - rc = orte_dss.pack(buffer, &endpoint->ib_addr->lid, 1, ORTE_UINT16); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16)); + rc = opal_dss.pack(buffer, &endpoint->ib_addr->lid, 1, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -305,9 +303,9 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, * recv qp number that we want to connect. */ if (ENDPOINT_XOOB_CONNECT_XRC_REQUEST == message_type) { BTL_VERBOSE(("Send pack remote qp = %x", endpoint->ib_addr->remote_xrc_rcv_qp_num)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->ib_addr->remote_xrc_rcv_qp_num, - 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->ib_addr->remote_xrc_rcv_qp_num, + 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -322,8 +320,8 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == message_type) { /* we need to send the endpoint index for immidate send */ BTL_VERBOSE(("Send pack index = %d", endpoint->index)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->index, 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -331,9 +329,9 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, /* on response we add all SRQ numbers */ for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) { BTL_VERBOSE(("Send pack srq[%d] num = %d", srq, endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num, - 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num, + 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -343,7 +341,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, /* send to remote endpoint */ rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, - buffer, XOOB_TAG, 0, + buffer, OMPI_RML_TAG_XOPENIB, 0, xoob_rml_send_cb, NULL); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -642,8 +640,8 @@ static mca_btl_openib_endpoint_t* xoob_find_endpoint(orte_process_name_t* proces ib_proc != (mca_btl_openib_proc_t*) opal_list_get_end(&mca_btl_openib_component.ib_procs); ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) { - if (orte_ns.compare_fields(ORTE_NS_CMP_ALL, - &ib_proc->proc_guid, process_name) == ORTE_EQUAL) { + if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &ib_proc->proc_guid, process_name) == OPAL_EQUAL) { found = true; break; } @@ -753,7 +751,7 @@ static void free_rem_info(mca_btl_openib_rem_info_t *rem_info) * otherwise try to modify QP's and establish reliable connection */ static void xoob_rml_recv_cb(int status, orte_process_name_t* process_name, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { int rc; @@ -947,7 +945,7 @@ static int xoob_init(void) int rc; rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - XOOB_TAG, + OMPI_RML_TAG_XOPENIB, ORTE_RML_PERSISTENT, xoob_rml_recv_cb, NULL); @@ -1018,6 +1016,6 @@ static int xoob_start_connect(mca_btl_base_endpoint_t *endpoint) */ static int xoob_finalize(void) { - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, XOOB_TAG); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_XOPENIB); return OMPI_SUCCESS; } diff --git a/ompi/mca/btl/sctp/btl_sctp_component.c b/ompi/mca/btl/sctp/btl_sctp_component.c index dd61d236ff..150b5ef14e 100644 --- a/ompi/mca/btl/sctp/btl_sctp_component.c +++ b/ompi/mca/btl/sctp/btl_sctp_component.c @@ -48,7 +48,6 @@ #include "opal/util/argv.h" #include "opal/util/output.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/ns/ns_types.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" diff --git a/ompi/mca/btl/sctp/btl_sctp_endpoint.c b/ompi/mca/btl/sctp/btl_sctp_endpoint.c index 60a441e683..5a9bf09a97 100644 --- a/ompi/mca/btl/sctp/btl_sctp_endpoint.c +++ b/ompi/mca/btl/sctp/btl_sctp_endpoint.c @@ -55,6 +55,7 @@ #include "ompi/types.h" #include "ompi/mca/btl/base/btl_base_error.h" +#include "orte/util/name_fns.h" #include "btl_sctp.h" #include "btl_sctp_endpoint.h" #include "btl_sctp_proc.h" @@ -567,7 +568,6 @@ bool mca_btl_sctp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct /* 1 to 1 */ mca_btl_sctp_addr_t* btl_addr; mca_btl_sctp_proc_t* this_proc = mca_btl_sctp_proc_local(); - orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL; int cmpval; OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock); @@ -576,7 +576,7 @@ bool mca_btl_sctp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct btl_addr->addr_inet.s_addr == addr->sin_addr.s_addr) { mca_btl_sctp_proc_t *endpoint_proc = btl_endpoint->endpoint_proc; - cmpval = orte_ns.compare_fields(mask, + cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &endpoint_proc->proc_ompi->proc_name, &this_proc->proc_ompi->proc_name); if((btl_endpoint->endpoint_sd < 0) || diff --git a/ompi/mca/btl/sctp/btl_sctp_proc.h b/ompi/mca/btl/sctp/btl_sctp_proc.h index ab5560810c..79d4dc0fa6 100644 --- a/ompi/mca/btl/sctp/btl_sctp_proc.h +++ b/ompi/mca/btl/sctp/btl_sctp_proc.h @@ -20,7 +20,6 @@ #define MCA_BTL_SCTP_PROC_H #include "opal/class/opal_object.h" -#include "orte/mca/ns/ns.h" #include "ompi/proc/proc.h" #include "btl_sctp.h" #include "btl_sctp_addr.h" diff --git a/ompi/mca/btl/sctp/btl_sctp_recv_handler.c b/ompi/mca/btl/sctp/btl_sctp_recv_handler.c index 372b84848f..5007e00c1a 100644 --- a/ompi/mca/btl/sctp/btl_sctp_recv_handler.c +++ b/ompi/mca/btl/sctp/btl_sctp_recv_handler.c @@ -43,7 +43,6 @@ #include "opal/util/argv.h" #include "opal/util/output.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/ns/ns_types.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" diff --git a/ompi/mca/btl/sm/btl_sm_component.c b/ompi/mca/btl/sm/btl_sm_component.c index f0361afb78..5c38a1c27a 100644 --- a/ompi/mca/btl/sm/btl_sm_component.c +++ b/ompi/mca/btl/sm/btl_sm_component.c @@ -45,6 +45,8 @@ #include "opal/util/output.h" #include "orte/util/sys_info.h" #include "orte/util/proc_info.h" +#include "orte/runtime/orte_globals.h" + #include "ompi/mca/pml/pml.h" #include "opal/mca/base/mca_base_param.h" #include "ompi/runtime/ompi_module_exchange.h" @@ -263,7 +265,7 @@ mca_btl_base_module_t** mca_btl_sm_component_init( /* create a named pipe to receive events */ sprintf( mca_btl_sm_component.sm_fifo_path, "%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir, - (unsigned long)orte_process_info.my_name->vpid ); + (unsigned long)ORTE_PROC_MY_NAME->vpid ); if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) { opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno); return NULL; diff --git a/ompi/mca/btl/tcp/btl_tcp_component.c b/ompi/mca/btl/tcp/btl_tcp_component.c index d00ebc6b89..d477be7014 100644 --- a/ompi/mca/btl/tcp/btl_tcp_component.c +++ b/ompi/mca/btl/tcp/btl_tcp_component.c @@ -50,7 +50,7 @@ #include "opal/util/argv.h" #include "opal/util/output.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" diff --git a/ompi/mca/btl/tcp/btl_tcp_endpoint.c b/ompi/mca/btl/tcp/btl_tcp_endpoint.c index 4f18ac5a21..684da05bc1 100644 --- a/ompi/mca/btl/tcp/btl_tcp_endpoint.c +++ b/ompi/mca/btl/tcp/btl_tcp_endpoint.c @@ -344,7 +344,7 @@ bool mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, return false; } - cmpval = orte_ns.compare_fields(ORTE_NS_CMP_ALL, + cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &endpoint_proc->proc_ompi->proc_name, &this_proc->proc_ompi->proc_name); if((btl_endpoint->endpoint_sd < 0) || @@ -475,7 +475,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en } ORTE_PROCESS_NAME_NTOH(guid); /* compare this to the expected values */ - if (0 != orte_ns.compare_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) { + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) { BTL_ERROR(("received unexpected process identifier %s", ORTE_NAME_PRINT(&guid))); mca_btl_tcp_endpoint_close(btl_endpoint); diff --git a/ompi/mca/btl/tcp/btl_tcp_proc.h b/ompi/mca/btl/tcp/btl_tcp_proc.h index 711ba8d750..47f6afbbd1 100644 --- a/ompi/mca/btl/tcp/btl_tcp_proc.h +++ b/ompi/mca/btl/tcp/btl_tcp_proc.h @@ -20,8 +20,8 @@ #define MCA_BTL_TCP_PROC_H #include "opal/class/opal_object.h" -#include "orte/mca/ns/ns.h" #include "ompi/proc/proc.h" +#include "orte/types.h" #include "btl_tcp.h" #include "btl_tcp_addr.h" #include "btl_tcp_endpoint.h" diff --git a/ompi/mca/btl/template/btl_template_endpoint.c b/ompi/mca/btl/template/btl_template_endpoint.c index 8d9ef33ccf..5e8561d4c9 100644 --- a/ompi/mca/btl/template/btl_template_endpoint.c +++ b/ompi/mca/btl/template/btl_template_endpoint.c @@ -21,11 +21,10 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "btl_template.h" #include "btl_template_endpoint.h" #include "btl_template_proc.h" diff --git a/ompi/mca/btl/template/btl_template_proc.h b/ompi/mca/btl/template/btl_template_proc.h index 3577b7208b..85d162a969 100644 --- a/ompi/mca/btl/template/btl_template_proc.h +++ b/ompi/mca/btl/template/btl_template_proc.h @@ -19,7 +19,6 @@ #ifndef MCA_BTL_TEMPLATE_PROC_H #define MCA_BTL_TEMPLATE_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_template.h" diff --git a/ompi/mca/btl/udapl/btl_udapl.h b/ompi/mca/btl/udapl/btl_udapl.h index 399e24f23f..0e7aa69bfb 100644 --- a/ompi/mca/btl/udapl/btl_udapl.h +++ b/ompi/mca/btl/udapl/btl_udapl.h @@ -226,7 +226,7 @@ do { \ if (verbose_level <= mca_btl_udapl_component.udapl_verbosity) { \ mca_btl_base_out("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__); \ mca_btl_base_out args; \ mca_btl_base_out("\n"); \ diff --git a/ompi/mca/btl/udapl/btl_udapl_endpoint.c b/ompi/mca/btl/udapl/btl_udapl_endpoint.c index fd432b54a1..6c89e20893 100644 --- a/ompi/mca/btl/udapl/btl_udapl_endpoint.c +++ b/ompi/mca/btl/udapl/btl_udapl_endpoint.c @@ -28,14 +28,17 @@ #include "ompi/types.h" #include "opal/include/opal/align.h" #include "opal/util/show_help.h" -#include "orte/mca/ns/base/base.h" + #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "opal/class/opal_pointer_array.h" + #include "ompi/class/ompi_free_list.h" #include "ompi/mca/mpool/rdma/mpool_rdma.h" +#include "ompi/mca/dpm/dpm.h" + #include "ompi/mca/btl/base/btl_base_error.h" #include "btl_udapl.h" #include "btl_udapl_endpoint.h" @@ -44,14 +47,14 @@ #include "btl_udapl_proc.h" static void mca_btl_udapl_endpoint_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint); static int mca_btl_udapl_endpoint_post_recv(mca_btl_udapl_endpoint_t* endpoint, size_t size); void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint); void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); static int mca_btl_udapl_endpoint_finish_eager(mca_btl_udapl_endpoint_t*); static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t*); @@ -272,7 +275,7 @@ int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint, static void mca_btl_udapl_endpoint_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { OBJ_RELEASE(buffer); } @@ -467,7 +470,7 @@ int mca_btl_udapl_endpoint_create(mca_btl_udapl_module_t* btl, static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) { mca_btl_udapl_addr_t* addr = &endpoint->endpoint_btl->udapl_addr; - orte_buffer_t* buf = OBJ_NEW(orte_buffer_t); + opal_buffer_t* buf = OBJ_NEW(opal_buffer_t); int rc; if(NULL == buf) { @@ -478,13 +481,13 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), 1); /* Pack our address information */ - rc = orte_dss.pack(buf, &addr->port, 1, ORTE_UINT64); + rc = opal_dss.pack(buf, &addr->port, 1, OPAL_UINT64); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_dss.pack(buf, &addr->addr, sizeof(DAT_SOCK_ADDR), ORTE_UINT8); + rc = opal_dss.pack(buf, &addr->addr, sizeof(DAT_SOCK_ADDR), OPAL_UINT8); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -492,7 +495,7 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) /* Send the buffer */ rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, buf, - ORTE_RML_TAG_UDAPL, 0, mca_btl_udapl_endpoint_send_cb, NULL); + OMPI_RML_TAG_UDAPL, 0, mca_btl_udapl_endpoint_send_cb, NULL); if(0 > rc) { ORTE_ERROR_LOG(rc); return rc; @@ -504,7 +507,7 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { mca_btl_udapl_addr_t addr; mca_btl_udapl_proc_t* proc; @@ -514,14 +517,14 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, int rc; /* Unpack data */ - rc = orte_dss.unpack(buffer, &addr.port, &cnt, ORTE_UINT64); + rc = opal_dss.unpack(buffer, &addr.port, &cnt, OPAL_UINT64); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } cnt = sizeof(mca_btl_udapl_addr_t); - rc = orte_dss.unpack(buffer, &addr.addr, &cnt, ORTE_UINT8); + rc = opal_dss.unpack(buffer, &addr.addr, &cnt, OPAL_UINT8); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; @@ -535,7 +538,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, opal_list_get_end(&mca_btl_udapl_component.udapl_procs); proc = (mca_btl_udapl_proc_t*)opal_list_get_next(proc)) { - if(ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &proc->proc_guid, endpoint)) { + if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &proc->proc_guid, endpoint)) { for(i = 0; i < proc->proc_endpoint_count; i++) { ep = proc->proc_endpoints[i]; @@ -561,7 +564,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, void mca_btl_udapl_endpoint_post_oob_recv(void) { - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UDAPL, + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, OMPI_RML_TAG_UDAPL, ORTE_RML_PERSISTENT, mca_btl_udapl_endpoint_recv, NULL); } @@ -577,7 +580,7 @@ void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint) /* Nasty test to prevent deadlock and unwanted connection attempts */ /* This right here is the whole point of using the ORTE/RML handshake */ if((MCA_BTL_UDAPL_CONN_EAGER == endpoint->endpoint_state && - 0 > orte_ns.compare_fields(ORTE_NS_CMP_ALL, + 0 > orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &endpoint->endpoint_proc->proc_guid, &ompi_proc_local()->proc_name)) || (MCA_BTL_UDAPL_CLOSED != endpoint->endpoint_state && @@ -715,7 +718,7 @@ static int mca_btl_udapl_endpoint_finish_eager( } /* Only one side does dat_ep_connect() */ - if(0 < orte_ns.compare_fields(ORTE_NS_CMP_ALL, + if(0 < orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &endpoint->endpoint_proc->proc_guid, &ompi_proc_local()->proc_name)) { diff --git a/ompi/mca/btl/udapl/btl_udapl_proc.h b/ompi/mca/btl/udapl/btl_udapl_proc.h index 3a9b882dd9..1dd97f7635 100644 --- a/ompi/mca/btl/udapl/btl_udapl_proc.h +++ b/ompi/mca/btl/udapl/btl_udapl_proc.h @@ -20,7 +20,6 @@ #ifndef MCA_BTL_UDAPL_PROC_H #define MCA_BTL_UDAPL_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_udapl.h" diff --git a/ompi/mca/coll/sm/coll_sm.h b/ompi/mca/coll/sm/coll_sm.h index 2eabb91d21..640fcf3a4f 100644 --- a/ompi/mca/coll/sm/coll_sm.h +++ b/ompi/mca/coll/sm/coll_sm.h @@ -24,7 +24,7 @@ #include "mpi.h" #include "opal/mca/mca.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/mpool/mpool.h" #include "ompi/mca/common/sm/common_sm_mmap.h" diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index 9d1c1949cd..938e889245 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -43,7 +43,10 @@ #include "opal/mca/maffinity/maffinity.h" #include "opal/mca/maffinity/base/base.h" #include "opal/util/os_path.h" -#include "orte/mca/ns/ns.h" + +#include "orte/util/proc_info.h" +#include "orte/util/name_fns.h" + #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/base.h" @@ -598,7 +601,7 @@ static int bootstrap_comm(ompi_communicator_t *comm, empty_index = -1; for (i = 0; i < mca_coll_sm_component.sm_bootstrap_num_segments; ++i) { if (comm->c_contextid == bshe->smbhe_keys[i].mcsbck_cid && - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, rank0, &bshe->smbhe_keys[i].mcsbck_rank0_name)) { found = true; diff --git a/ompi/mca/common/portals/common_portals.c b/ompi/mca/common/portals/common_portals.c index 421ba6510d..76f1223e0c 100644 --- a/ompi/mca/common/portals/common_portals.c +++ b/ompi/mca/common/portals/common_portals.c @@ -17,7 +17,7 @@ */ #include "ompi_config.h" - +#include "ompi/constants.h" #include "common_portals.h" diff --git a/ompi/mca/common/sm/common_sm_mmap.c b/ompi/mca/common/sm/common_sm_mmap.c index 5fbf011152..b67084f5cd 100644 --- a/ompi/mca/common/sm/common_sm_mmap.c +++ b/ompi/mca/common/sm/common_sm_mmap.c @@ -51,6 +51,8 @@ #include "orte/mca/rml/rml_types.h" #include "orte/mca/rml/base/base.h" +#include "ompi/mca/dpm/dpm.h" + OBJ_CLASS_INSTANCE( mca_common_sm_mmap_t, opal_object_t, @@ -179,13 +181,13 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name, /* signal the rest of the local procs that the backing file has been created */ for(p=1 ; p < n_local_procs ; p++ ) { - sm_file_created=ORTE_RML_TAG_SM_BACK_FILE_CREATED; + sm_file_created=OMPI_RML_TAG_SM_BACK_FILE_CREATED; iov[0].iov_base=&sm_file_created; iov[0].iov_len=sizeof(sm_file_created); iov[1].iov_base=&sm_file_inited; iov[1].iov_len=sizeof(sm_file_inited); rc=orte_rml.send(&(procs[p]->proc_name),iov,2, - ORTE_RML_TAG_SM_BACK_FILE_CREATED,0); + OMPI_RML_TAG_SM_BACK_FILE_CREATED,0); if( rc < 0 ) { opal_output(0, "mca_common_sm_mmap_init: orte_rml.send failed to %lu with errno=%d\n", @@ -205,7 +207,7 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name, iov[1].iov_base=&sm_file_inited; iov[1].iov_len=sizeof(sm_file_inited); rc=orte_rml.recv(&(procs[0]->proc_name),iov,2, - ORTE_RML_TAG_SM_BACK_FILE_CREATED,0); + OMPI_RML_TAG_SM_BACK_FILE_CREATED,0); if( rc < 0 ) { opal_output(0, "mca_common_sm_mmap_init: orte_rml.recv failed from %ld with errno=%d\n", 0L, errno); diff --git a/ompi/mca/crcp/base/base.h b/ompi/mca/crcp/base/base.h index 942a2e3315..b8de8892a0 100644 --- a/ompi/mca/crcp/base/base.h +++ b/ompi/mca/crcp/base/base.h @@ -22,7 +22,7 @@ #include "ompi/constants.h" #include "orte/mca/rml/rml.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "ompi/mca/crcp/crcp.h" diff --git a/ompi/mca/crcp/base/crcp_base_fns.c b/ompi/mca/crcp/base/crcp_base_fns.c index 5ac7aad1c6..abc1f914aa 100644 --- a/ompi/mca/crcp/base/crcp_base_fns.c +++ b/ompi/mca/crcp/base/crcp_base_fns.c @@ -31,9 +31,6 @@ #include "opal/util/output.h" #include "opal/util/os_dirpath.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/gpr/gpr.h" - #include "ompi/communicator/communicator.h" #include "ompi/proc/proc.h" #include "opal/mca/base/mca_base_param.h" diff --git a/ompi/mca/crcp/coord/crcp_coord.h b/ompi/mca/crcp/coord/crcp_coord.h index bdaa174380..3027dbac02 100644 --- a/ompi/mca/crcp/coord/crcp_coord.h +++ b/ompi/mca/crcp/coord/crcp_coord.h @@ -29,7 +29,6 @@ #include "opal/mca/mca.h" #include "ompi/mca/crcp/crcp.h" #include "ompi/communicator/communicator.h" -#include "orte/mca/ns/ns.h" #include "opal/runtime/opal_cr.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" diff --git a/ompi/mca/crcp/coord/crcp_coord_btl.h b/ompi/mca/crcp/coord/crcp_coord_btl.h index 03ed8b3b21..51d05fe00b 100644 --- a/ompi/mca/crcp/coord/crcp_coord_btl.h +++ b/ompi/mca/crcp/coord/crcp_coord_btl.h @@ -29,7 +29,6 @@ #include "opal/mca/mca.h" #include "ompi/mca/crcp/crcp.h" #include "ompi/communicator/communicator.h" -#include "orte/mca/ns/ns.h" #include "opal/runtime/opal_cr.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" diff --git a/ompi/mca/crcp/coord/crcp_coord_pml.c b/ompi/mca/crcp/coord/crcp_coord_pml.c index d6c1b282ec..8251501afd 100644 --- a/ompi/mca/crcp/coord/crcp_coord_pml.c +++ b/ompi/mca/crcp/coord/crcp_coord_pml.c @@ -171,8 +171,11 @@ #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" #include "ompi/request/request.h" #include "ompi/datatype/dt_arch.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" #include "ompi/mca/pml/base/pml_base_request.h" @@ -279,7 +282,7 @@ static int recv_bookmarks(int peer_idx); */ static void recv_bookmarks_cbfunc(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata); static int total_recv_bookmarks = 0; @@ -367,7 +370,7 @@ static int ft_event_post_drain_acks(void); */ static void drain_message_ack_cbfunc(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata); @@ -766,7 +769,7 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_state_t, #define PACK_BUFFER(buffer, var, count, type, error_msg) \ { \ - if (OMPI_SUCCESS != (ret = orte_dss.pack(buffer, &(var), count, type)) ) { \ + if (OMPI_SUCCESS != (ret = opal_dss.pack(buffer, &(var), count, type)) ) { \ opal_output(mca_crcp_coord_component.super.output_handle, \ "%s (Return %d)", error_msg, ret); \ exit_status = ret; \ @@ -777,7 +780,7 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_state_t, #define UNPACK_BUFFER(buffer, var, count, type, error_msg) \ { \ orte_std_cntr_t n = count; \ - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &(var), &n, type)) ) { \ + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(var), &n, type)) ) { \ opal_output(mca_crcp_coord_component.super.output_handle, \ "%s (Return %d)", error_msg, ret); \ exit_status = ret; \ @@ -2703,6 +2706,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_ft_event( ompi_crcp_base_pml_state_t* pml_state) { static int step_to_return_to = 0; + opal_list_item_t* item = NULL; int exit_status = OMPI_SUCCESS; int ret; @@ -2782,6 +2786,19 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_ft_event( goto DONE; } + /* + * Refresh the jobids + */ + for(item = opal_list_get_first(&ompi_crcp_coord_pml_peer_refs); + item != opal_list_get_end(&ompi_crcp_coord_pml_peer_refs); + item = opal_list_get_next(item) ) { + ompi_crcp_coord_pml_peer_ref_t *cur_peer_ref; + cur_peer_ref = (ompi_crcp_coord_pml_peer_ref_t*)item; + + /* JJH - Assuming only one global jobid at the moment */ + cur_peer_ref->proc_name.jobid = ORTE_PROC_MY_NAME->jobid; + } + /* * Finish the coord protocol */ @@ -2832,9 +2849,9 @@ static ompi_crcp_coord_pml_peer_ref_t * find_peer(orte_process_name_t proc) ompi_crcp_coord_pml_peer_ref_t *cur_peer_ref; cur_peer_ref = (ompi_crcp_coord_pml_peer_ref_t*)item; - if( 0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, - &(cur_peer_ref->proc_name), - &proc) ) { + if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &(cur_peer_ref->proc_name), + &proc) ) { return cur_peer_ref; } } @@ -2993,7 +3010,7 @@ static int ft_event_coordinate_peers(void) if( stall_for_completion ) { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: ft_event_coordinate_peers: %s **** STALLING ***", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); step_to_return_to = 1; exit_status = OMPI_SUCCESS; goto DONE; @@ -3019,7 +3036,7 @@ static int ft_event_coordinate_peers(void) OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle, "crcp:coord: ft_event_coordinate_peers: %s Coordination Finished...\n", - ORTE_NAME_PRINT(orte_process_info.my_name) )); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* * Now that all our peer channels are marked as drained @@ -3099,7 +3116,7 @@ static int ft_event_finalize_exchange(void) static int ft_event_exchange_bookmarks(void) { int peer_idx = 0; - int my_idx = orte_process_info.my_name->vpid; + int my_idx = ORTE_PROC_MY_NAME->vpid; int iter = 0; int num_peers = 0; @@ -3144,13 +3161,13 @@ static int ft_event_check_bookmarks(void) int p_n_from_p_m = 0; if( 10 <= mca_crcp_coord_component.super.verbose ) { - sleep(orte_process_info.my_name->vpid); + sleep(ORTE_PROC_MY_NAME->vpid); OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "Process %s Match Table", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "%s %5s | %7s | %7s | %7s | %7s |", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), "Vpid", "T_Send", "M_Recv", "M_Send", "T_Recv")); for(item = opal_list_get_first(&ompi_crcp_coord_pml_peer_refs); @@ -3176,7 +3193,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "%s %5d | %7d | %7d | %7d | %7d |", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), peer_ref->proc_name.vpid, t_send, m_recv, m_send, t_recv)); } @@ -3193,14 +3210,14 @@ static int ft_event_check_bookmarks(void) ompi_crcp_coord_pml_peer_ref_t *peer_ref; peer_ref = (ompi_crcp_coord_pml_peer_ref_t*)item; - if( 0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, - (orte_process_info.my_name), - &(peer_ref->proc_name)) ) { + if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + (ORTE_PROC_MY_NAME), + &(peer_ref->proc_name)) ) { continue; } /* Lowest Rank sends first */ - if( orte_process_info.my_name->vpid < peer_ref->proc_name.vpid ) { + if( ORTE_PROC_MY_NAME->vpid < peer_ref->proc_name.vpid ) { /******************** * Check P_n --> P_m * Has the peer received all the messages that I have put on the wire? @@ -3218,7 +3235,7 @@ static int ft_event_check_bookmarks(void) "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: Peer received more than was sent. :(\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3232,7 +3249,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d). Peer needs %4d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3269,7 +3286,7 @@ static int ft_event_check_bookmarks(void) "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: I received more than the peer sent. :(\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3283,7 +3300,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: check_bookmarks: %s <-- %s " "Received Msgs (%4d) = Sent Msgs (%4d). I need %4d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3321,7 +3338,7 @@ static int ft_event_check_bookmarks(void) "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: I received more than the peer sent. :(\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3335,7 +3352,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: check_bookmarks: %s <-- %s " "Received Msgs (%4d) = Sent Msgs (%4d). I need %4d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3371,7 +3388,7 @@ static int ft_event_check_bookmarks(void) "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: Peer received more than was sent. :(\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3385,7 +3402,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d). Peer needs %4d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3433,7 +3450,7 @@ static int ft_event_post_drain_acks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drain_ack: %s Wait on %d Drain ACK Messages.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)req_size)); /* @@ -3453,7 +3470,7 @@ static int ft_event_post_drain_acks(void) NULL) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drain_acks: %s Failed to post a RML receive to the peer\n", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); return ret; } } @@ -3463,7 +3480,7 @@ static int ft_event_post_drain_acks(void) static void drain_message_ack_cbfunc(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata) { @@ -3474,7 +3491,7 @@ static void drain_message_ack_cbfunc(int status, /* * Unpack the buffer */ - UNPACK_BUFFER(buffer, ckpt_status, 1, ORTE_SIZE, ""); + UNPACK_BUFFER(buffer, ckpt_status, 1, OPAL_SIZE, ""); /* * Update the outstanding message queue @@ -3488,13 +3505,14 @@ static void drain_message_ack_cbfunc(int status, /* If this ACK has not completed yet */ if(!drain_msg_ack->complete) { /* If it is the correct peer */ - if(drain_msg_ack->peer.jobid == sender->jobid && - drain_msg_ack->peer.vpid == sender->vpid ) { + if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &(drain_msg_ack->peer), + sender) ) { /* We found it! */ drain_msg_ack->complete = true; OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle, "crcp:coord: drain_message_ack_cbfunc: %s --> %s Received ACK of FLUSH from peer\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender) )); return; } @@ -3503,7 +3521,7 @@ static void drain_message_ack_cbfunc(int status, opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: drain_message_ack_cbfunc: %s --> %s ERROR: Uable to match ACK to peer\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender) ); cleanup: @@ -3523,7 +3541,7 @@ static int ft_event_post_drained(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drained: %s Draining %d Messages.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)req_size)); /* @@ -3544,7 +3562,7 @@ static int ft_event_post_drained(void) if( drain_msg->already_posted ) { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drained: %s Found a message that we don't need to post.\n", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); continue; } /* @@ -3553,7 +3571,7 @@ static int ft_event_post_drained(void) else { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drained: %s Posting a message to be drained from %d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), drain_msg->rank)); if( OMPI_SUCCESS != (ret = wrapped_pml_module->pml_irecv(drain_msg->buffer, (drain_msg->count * drain_msg->ddt_size), @@ -3564,7 +3582,7 @@ static int ft_event_post_drained(void) &(drain_msg->request) ) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drained: %s Failed to post the Draining PML iRecv\n", - ORTE_NAME_PRINT(orte_process_info.my_name) ); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); return ret; } } @@ -3584,7 +3602,7 @@ static int ft_event_wait_quiesce(void) if( OMPI_SUCCESS != (ret = wait_quiesce_drained() ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce: %s Failed to quiesce drained messages\n", - ORTE_NAME_PRINT(orte_process_info.my_name) ); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); exit_status = ret; goto cleanup; } @@ -3595,7 +3613,7 @@ static int ft_event_wait_quiesce(void) if( OMPI_SUCCESS != (ret = wait_quiesce_drain_ack() ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce: %s Failed to recv all drain ACKs\n", - ORTE_NAME_PRINT(orte_process_info.my_name) ); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); exit_status = ret; goto cleanup; } @@ -3628,7 +3646,7 @@ static int wait_quiesce_drained(void) OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce_drained: %s Waiting on %d messages to drain\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)req_size)); /* @@ -3683,13 +3701,13 @@ static int wait_quiesce_drained(void) if( drain_msg->already_posted && NULL == drain_msg->request) { OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce_drained: %s - %s Already posted this msg.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(drain_msg->proc_name)) )); } else { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce_drained: %s - %s Waiting on message. (index = %d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(drain_msg->proc_name)), (int)wait_any_count)); @@ -3704,8 +3722,9 @@ static int wait_quiesce_drained(void) /* Add proc to response queue if it is not already there */ found = false; for(i = 0; i < last_proc_idx; ++i) { - if(proc_names[i].jobid == drain_msg->proc_name.jobid && - proc_names[i].vpid == drain_msg->proc_name.vpid ) { + if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &(proc_names[i]), + &(drain_msg->proc_name) ) ) { found = true; break; } @@ -3713,7 +3732,7 @@ static int wait_quiesce_drained(void) if( !found ) { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce: %s - %s Add process to response list [idx %d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(drain_msg->proc_name)), (int)last_proc_idx)); @@ -3741,19 +3760,19 @@ static int wait_quiesce_drained(void) */ OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce: %s Send ACKs to all Peers\n", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); for(i = 0; i < last_proc_idx; ++i) { - orte_buffer_t *buffer = NULL; + opal_buffer_t *buffer = NULL; size_t response = 1; /* Send All Clear to Peer */ - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = OMPI_ERROR; goto cleanup; } - PACK_BUFFER(buffer, response, 1, ORTE_SIZE, ""); + PACK_BUFFER(buffer, response, 1, OPAL_SIZE, ""); if ( 0 > ( ret = orte_rml.send_buffer(&(proc_names[i]), buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { exit_status = ret; @@ -3821,7 +3840,7 @@ static int coord_request_wait_all( size_t count, OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: request_wait_all: %s Done with idx %d of %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)i, (int)count)); } @@ -3858,7 +3877,7 @@ static int wait_quiesce_drain_ack(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce_drain_ack: %s Waiting on %d Drain ACK messages\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_outstanding)); while(0 < num_outstanding) { @@ -3892,7 +3911,7 @@ static int send_bookmarks(int peer_idx) { ompi_crcp_coord_pml_peer_ref_t *peer_ref; orte_process_name_t peer_name; - orte_buffer_t *buffer = NULL; + opal_buffer_t *buffer = NULL; int exit_status = OMPI_SUCCESS; int ret; @@ -3900,7 +3919,7 @@ static int send_bookmarks(int peer_idx) /* * Find the peer structure for this peer */ - peer_name.jobid = orte_process_info.my_name->jobid; + peer_name.jobid = ORTE_PROC_MY_NAME->jobid; peer_name.vpid = peer_idx; if( NULL == (peer_ref = find_peer(peer_name))) { @@ -3913,7 +3932,7 @@ static int send_bookmarks(int peer_idx) OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: send_bookmarks: %s -> %s Sending bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer_name), peer_ref->total_send_msgs, peer_ref->total_isend_msgs, @@ -3925,23 +3944,23 @@ static int send_bookmarks(int peer_idx) /* * Send the bookmarks to peer */ - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = OMPI_ERROR; goto cleanup; } - PACK_BUFFER(buffer, (peer_ref->total_send_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_send_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_send_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_isend_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_isend_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_isend_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_send_init_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_send_init_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_send_init_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_recv_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_recv_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_recv_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_irecv_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_irecv_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_irecv_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_recv_init_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_recv_init_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_recv_init_msgs"); if ( 0 > ( ret = orte_rml.send_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { @@ -3975,7 +3994,7 @@ static int recv_bookmarks(int peer_idx) START_TIMER(CRCP_TIMER_CKPT_PEER_R); - peer_name.jobid = orte_process_info.my_name->jobid; + peer_name.jobid = ORTE_PROC_MY_NAME->jobid; peer_name.vpid = peer_idx; if ( 0 > (ret = orte_rml.recv_buffer_nb(&peer_name, @@ -4005,7 +4024,7 @@ static int recv_bookmarks(int peer_idx) { ompi_crcp_coord_pml_peer_ref_t *peer_ref; orte_process_name_t peer_name; - orte_buffer_t * buffer = NULL; + opal_buffer_t * buffer = NULL; int exit_status = OMPI_SUCCESS; int ret, tmp_int; @@ -4014,7 +4033,7 @@ static int recv_bookmarks(int peer_idx) /* * Find the peer structure for this peer */ - peer_name.jobid = orte_process_info.my_name->jobid; + peer_name.jobid = ORTE_PROC_MY_NAME->jobid; peer_name.vpid = peer_idx; if( NULL == (peer_ref = find_peer(peer_name))) { @@ -4028,7 +4047,7 @@ static int recv_bookmarks(int peer_idx) /* * Receive the bookmark from peer */ - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = ORTE_ERROR; goto cleanup; } @@ -4042,29 +4061,29 @@ static int recv_bookmarks(int peer_idx) goto cleanup; } - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_send_msgs"); peer_ref->matched_send_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_isend_msgs"); peer_ref->matched_isend_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_send_init_msgs"); peer_ref->matched_send_init_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_recv_msgs"); peer_ref->matched_recv_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_irecv_msgs"); peer_ref->matched_irecv_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_recv_init_msgs"); peer_ref->matched_recv_init_msgs = tmp_int; OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_bookmarks: %s <- %s Received bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer_name), peer_ref->matched_send_msgs, peer_ref->matched_isend_msgs, @@ -4087,7 +4106,7 @@ static int recv_bookmarks(int peer_idx) static void recv_bookmarks_cbfunc(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata) { @@ -4111,29 +4130,29 @@ static void recv_bookmarks_cbfunc(int status, goto cleanup; } - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_send_msgs"); peer_ref->matched_send_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_isend_msgs"); peer_ref->matched_isend_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_send_init_msgs"); peer_ref->matched_send_init_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_recv_msgs"); peer_ref->matched_recv_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_irecv_msgs"); peer_ref->matched_irecv_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_recv_init_msgs"); peer_ref->matched_recv_init_msgs = tmp_int; OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_bookmarks: %s <- %s Received bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender), peer_ref->matched_send_msgs, peer_ref->matched_isend_msgs, @@ -4189,7 +4208,7 @@ static int send_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if(OMPI_SUCCESS != (ret = do_send_msg_detail(peer_ref, msg_ref, &found_match, &finished)) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: send_msg_details: %s --> %s Failed to send message details to peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); } @@ -4240,7 +4259,7 @@ static int send_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, opal_list_append(&drained_msg_ack_list, &(d_msg_ack->super)); OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: send_msg_details: %s <--> %s Will wait on ACK from this peer.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)))); /* @@ -4261,7 +4280,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, bool *found_match, bool *finished) { - orte_buffer_t *buffer = NULL; + opal_buffer_t *buffer = NULL; int32_t req_more = -1; int comm_my_rank = -1; int exit_status = OMPI_SUCCESS; @@ -4275,7 +4294,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, buffer = NULL; } - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = OMPI_ERROR; goto cleanup; } @@ -4287,9 +4306,9 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, */ comm_my_rank = ompi_comm_rank(msg_ref->comm); - PACK_BUFFER(buffer, msg_ref->comm->c_contextid, 1, ORTE_UINT32, + PACK_BUFFER(buffer, msg_ref->comm->c_contextid, 1, OPAL_UINT32, "crcp:coord: send_msg_details: Unable to pack communicator ID"); - PACK_BUFFER(buffer, comm_my_rank, 1, ORTE_INT, + PACK_BUFFER(buffer, comm_my_rank, 1, OPAL_INT, "crcp:coord: send_msg_details: Unable to pack comm rank ID"); /* @@ -4298,11 +4317,11 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, * - Message count * - Message Datatype size */ - PACK_BUFFER(buffer, msg_ref->tag, 1, ORTE_INT, + PACK_BUFFER(buffer, msg_ref->tag, 1, OPAL_INT, "crcp:coord: send_msg_details: Unable to pack tag"); - PACK_BUFFER(buffer, msg_ref->count, 1, ORTE_SIZE, + PACK_BUFFER(buffer, msg_ref->count, 1, OPAL_SIZE, "crcp:coord: send_msg_details: Unable to pack count"); - PACK_BUFFER(buffer, msg_ref->ddt_size, 1, ORTE_SIZE, + PACK_BUFFER(buffer, msg_ref->ddt_size, 1, OPAL_SIZE, "crcp:coord: send_msg_details: Unable to pack datatype size"); /* @@ -4327,7 +4346,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, /* * Check return value from peer to see if we found a match. */ - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = ORTE_ERROR; goto cleanup; } @@ -4339,14 +4358,14 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: do_send_msg_detail: %s --> %s Failed to receive ACK buffer from peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; goto cleanup; } - UNPACK_BUFFER(buffer, req_more, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, req_more, 1, OPAL_UINT32, "crcp:coord: send_msg_details: Failed to unpack the ACK from peer buffer."); /* Mark message as matched */ @@ -4411,7 +4430,7 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_details: %s <-- %s " "Failed to receive message detail from peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4431,7 +4450,7 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_details: %s <-- %s " "Failed to check message detail from peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4458,7 +4477,7 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if(OMPI_SUCCESS != (ret = do_recv_msg_detail_resp(peer_ref, response))) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_details: %s <-- %s Failed to respond to peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4475,11 +4494,11 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, int *rank, uint32_t *comm_id, int *tag, size_t *count, size_t *datatype_size) { - orte_buffer_t * buffer = NULL; + opal_buffer_t * buffer = NULL; int exit_status = OMPI_SUCCESS; int ret; - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = ORTE_ERROR; goto cleanup; } @@ -4490,7 +4509,7 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: do_recv_msg_detail: %s <-- %s Failed to receive buffer from peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4498,17 +4517,17 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, } /* Pull out the communicator ID */ - UNPACK_BUFFER(buffer, (*comm_id), 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, (*comm_id), 1, OPAL_UINT32, "crcp:coord: recv_msg_details: Failed to unpack the communicator ID"); - UNPACK_BUFFER(buffer, (*rank), 1, ORTE_INT, + UNPACK_BUFFER(buffer, (*rank), 1, OPAL_INT, "crcp:coord: recv_msg_details: Failed to unpack the communicator rank ID"); /* Pull out the message details */ - UNPACK_BUFFER(buffer, (*tag), 1, ORTE_INT, + UNPACK_BUFFER(buffer, (*tag), 1, OPAL_INT, "crcp:coord: recv_msg_details: Failed to unpack the tag"); - UNPACK_BUFFER(buffer, (*count), 1, ORTE_SIZE, + UNPACK_BUFFER(buffer, (*count), 1, OPAL_SIZE, "crcp:coord: recv_msg_details: Failed to unpack the count"); - UNPACK_BUFFER(buffer, (*datatype_size), 1, ORTE_SIZE, + UNPACK_BUFFER(buffer, (*datatype_size), 1, OPAL_SIZE, "crcp:coord: recv_msg_details: Failed to unpack the datatype size"); cleanup: @@ -4552,7 +4571,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s -- %s " "Failed to determine if we have received this message. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4562,7 +4581,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((20, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s -- %s" " found %s, complete %s, posted %s, peer_rank=[%d vs %d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), (true == msg_found ? "True " : "False"), (true == msg_complete ? "True " : "False"), @@ -4580,7 +4599,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s Found a message that needs to be drained\n", - ORTE_NAME_PRINT(orte_process_info.my_name) )); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* * Construct a message for draining @@ -4639,7 +4658,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s " "Found a message already posted! Prepare to drain.\n", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* * If this is the current blocking recv, @@ -4650,7 +4669,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s " "Found a message already posted! Prepare to STALL.\n", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); stall_for_completion = true; } /* @@ -4661,7 +4680,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s " "Found a message already posted! No stall required [%3d, %3d, %3d, %3d].\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)current_msg_id, (int)current_msg_type, (int)posted_msg_ref->msg_id, @@ -4679,7 +4698,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, * messages. * JJH -- When do we use this? */ - if (posted_msg_ref->rank != peer_ref->proc_name.vpid) { + if (posted_msg_ref->rank != (int)peer_ref->proc_name.vpid) { posted_msg_ref->suggested_rank = rank; } @@ -4709,7 +4728,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, else { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: ***** ERROR ***** %s Failed to find an action to use. This should never happen!\n", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); exit_status = OMPI_ERROR; goto cleanup; } @@ -4989,16 +5008,16 @@ static int find_message_named(opal_list_t * search_list, static int do_recv_msg_detail_resp(ompi_crcp_coord_pml_peer_ref_t *peer_ref, int resp) { - orte_buffer_t * buffer = NULL; + opal_buffer_t * buffer = NULL; int exit_status = OMPI_SUCCESS; int ret; - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = OMPI_ERROR; goto cleanup; } - PACK_BUFFER(buffer, resp, 1, ORTE_UINT32, + PACK_BUFFER(buffer, resp, 1, OPAL_UINT32, "crcp:coord: recv_msg_details: Unable to ask peer for more messages"); if ( 0 > ( ret = orte_rml.send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { @@ -5066,7 +5085,7 @@ static void display_all_timers(int state) { static void display_indv_timer(int idx, int var) { double diff = timer_end[idx] - timer_start[idx]; - if( 0 != orte_process_info.my_name->vpid ) { + if( 0 != ORTE_PROC_MY_NAME->vpid ) { return; } diff --git a/ompi/mca/crcp/coord/crcp_coord_pml.h b/ompi/mca/crcp/coord/crcp_coord_pml.h index 42b2d29316..17237e5cf1 100644 --- a/ompi/mca/crcp/coord/crcp_coord_pml.h +++ b/ompi/mca/crcp/coord/crcp_coord_pml.h @@ -29,7 +29,6 @@ #include "opal/mca/mca.h" #include "ompi/mca/crcp/crcp.h" #include "ompi/communicator/communicator.h" -#include "orte/mca/ns/ns.h" #include "opal/runtime/opal_cr.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" diff --git a/orte/mca/ns/Makefile.am b/ompi/mca/dpm/Makefile.am similarity index 73% rename from orte/mca/ns/Makefile.am rename to ompi/mca/dpm/Makefile.am index e28ba7e551..60a87eace4 100644 --- a/orte/mca/ns/Makefile.am +++ b/ompi/mca/dpm/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University @@ -17,22 +17,22 @@ # # main library setup -noinst_LTLIBRARIES = libmca_ns.la -libmca_ns_la_SOURCES = +noinst_LTLIBRARIES = libmca_dpm.la +libmca_dpm_la_SOURCES = # header setup -nobase_orte_HEADERS = +nobase_ompi_HEADERS = # local files -headers = ns.h ns_types.h -libmca_ns_la_SOURCES += $(headers) +headers = dpm.h +libmca_dpm_la_SOURCES += $(headers) # Conditionally install the header files if WANT_INSTALL_HEADERS -nobase_orte_HEADERS += $(headers) -ortedir = $(includedir)/openmpi/orte/mca/ns +nobase_ompi_HEADERS += $(headers) +ompidir = $(includedir)/openmpi/ompi/mca/dpm else -ortedir = $(includedir) +ompidir = $(includedir) endif include base/Makefile.am diff --git a/ompi/mca/dpm/base/Makefile.am b/ompi/mca/dpm/base/Makefile.am new file mode 100644 index 0000000000..f6474cdf4c --- /dev/null +++ b/ompi/mca/dpm/base/Makefile.am @@ -0,0 +1,29 @@ +# +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_pkgdata_DATA = base/help-ompi-dpm-base.txt + +headers += \ + base/base.h + +libmca_dpm_la_SOURCES += \ + base/dpm_base_open.c \ + base/dpm_base_close.c \ + base/dpm_base_select.c \ + base/dpm_base_common_fns.c + diff --git a/ompi/mca/dpm/base/base.h b/ompi/mca/dpm/base/base.h new file mode 100644 index 0000000000..d91257ef2e --- /dev/null +++ b/ompi/mca/dpm/base/base.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef OMPI_MCA_DPM_BASE_H +#define OMPI_MCA_DPM_BASE_H + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "ompi/mca/dpm/dpm.h" + +/* + * Global functions for MCA overall DPM + */ + +BEGIN_C_DECLS + +struct ompi_dpm_base_disconnect_obj { + ompi_communicator_t *comm; + int size; + struct ompi_request_t **reqs; + int buf; +}; +typedef struct ompi_dpm_base_disconnect_obj ompi_dpm_base_disconnect_obj; + +/** + * Initialize the DPM MCA framework + * + * @retval OMPI_SUCCESS Upon success + * @retval OMPI_ERROR Upon failures + * + * This function is invoked during ompi_init(); + */ +OMPI_DECLSPEC int ompi_dpm_base_open(void); + +/** + * Select an available component. + * + * @retval OMPI_SUCCESS Upon Success + * @retval OMPI_NOT_FOUND If no component can be selected + * @retval OMPI_ERROR Upon other failure + * + */ +OMPI_DECLSPEC int ompi_dpm_base_select(void); + +/** + * Finalize the DPM MCA framework + * + * @retval OMPI_SUCCESS Upon success + * @retval OMPI_ERROR Upon failures + * + * This function is invoked during ompi_finalize(); + */ +OMPI_DECLSPEC int ompi_dpm_base_close(void); + +/* Internal support functions */ +OMPI_DECLSPEC char* ompi_dpm_base_dyn_init (void); +OMPI_DECLSPEC int ompi_dpm_base_dyn_finalize (void); +OMPI_DECLSPEC void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm); +OMPI_DECLSPEC ompi_dpm_base_disconnect_obj *ompi_dpm_base_disconnect_init ( ompi_communicator_t *comm); +OMPI_DECLSPEC void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs); + + +/* useful globals */ +OMPI_DECLSPEC extern int ompi_dpm_base_output; +OMPI_DECLSPEC extern opal_list_t ompi_dpm_base_components_available; +OMPI_DECLSPEC extern ompi_dpm_base_component_t ompi_dpm_base_selected_component; +OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm; + +END_C_DECLS + +#endif /* OMPI_MCA_DPM_BASE_H */ diff --git a/ompi/mca/dpm/base/dpm_base_close.c b/ompi/mca/dpm/base/dpm_base_close.c new file mode 100644 index 0000000000..291b02d408 --- /dev/null +++ b/ompi/mca/dpm/base/dpm_base_close.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "ompi/mca/dpm/dpm.h" +#include "ompi/mca/dpm/base/base.h" + +int ompi_dpm_base_close(void) +{ + /* Close the selected component */ + if( NULL != ompi_dpm.finalize ) { + ompi_dpm.finalize(); + } + + /* Close all available modules that are open */ + mca_base_components_close(ompi_dpm_base_output, + &ompi_dpm_base_components_available, + NULL); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/dpm/base/dpm_base_common_fns.c b/ompi/mca/dpm/base/dpm_base_common_fns.c new file mode 100644 index 0000000000..c98e48a262 --- /dev/null +++ b/ompi/mca/dpm/base/dpm_base_common_fns.c @@ -0,0 +1,279 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2007 University of Houston. All rights reserved. + * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include +#include + +#include "ompi/request/request.h" +#include "ompi/mca/dpm/dpm.h" +#include "ompi/mca/pml/pml.h" + +#include "ompi/mca/dpm/base/base.h" + + +char* ompi_dpm_base_dyn_init (void) +{ + char *envvarname=NULL, *port_name=NULL; + + /* check for appropriate env variable */ + asprintf(&envvarname, "OMPI_PARENT_PORT"); + port_name = getenv(envvarname); + free (envvarname); + + return port_name; +} + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* this routine runs through the list of communicators + and does the disconnect for all dynamic communicators */ +int ompi_dpm_base_dyn_finalize (void) +{ + int i,j=0, max=0; + ompi_dpm_base_disconnect_obj **objs=NULL; + ompi_communicator_t *comm=NULL; + + if ( 1 size = ompi_comm_remote_size (comm); + } else { + obj->size = ompi_comm_size (comm); + } + + obj->comm = comm; + obj->reqs = (ompi_request_t **) malloc(2*obj->size*sizeof(ompi_request_t *)); + if ( NULL == obj->reqs ) { + free (obj); + return NULL; + } + + /* initiate all isend_irecvs. We use a dummy buffer stored on + the object, since we are sending zero size messages anyway. */ + for ( i=0; i < obj->size; i++ ) { + ret = MCA_PML_CALL(irecv (&(obj->buf), 0, MPI_INT, i, + OMPI_COMM_BARRIER_TAG, comm, + &(obj->reqs[2*i]))); + + if ( OMPI_SUCCESS != ret ) { + free (obj->reqs); + free (obj); + return NULL; + } + + ret = MCA_PML_CALL(isend (&(obj->buf), 0, MPI_INT, i, + OMPI_COMM_BARRIER_TAG, + MCA_PML_BASE_SEND_SYNCHRONOUS, + comm, &(obj->reqs[2*i+1]))); + + if ( OMPI_SUCCESS != ret ) { + free (obj->reqs); + free (obj); + return NULL; + } + } + + /* return handle */ + return obj; +} +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* - count how many requests are active + * - generate a request array large enough to hold + all active requests + * - call waitall on the overall request array + * - free the objects + */ +void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs) +{ + + ompi_request_t **reqs=NULL; + char *treq=NULL; + int totalcount = 0; + int i; + int ret; + + for (i=0; isize; + } + + reqs = (ompi_request_t **) malloc (2*totalcount*sizeof(ompi_request_t *)); + if ( NULL == reqs ) { + printf("ompi_comm_disconnect_waitall: error allocating memory\n"); + return; + } + + /* generate a single, large array of pending requests */ + treq = (char *)reqs; + for (i=0; ireqs, 2*objs[i]->size * sizeof(ompi_request_t *)); + treq += 2*objs[i]->size * sizeof(ompi_request_t *); + } + + /* force all non-blocking all-to-alls to finish */ + ret = ompi_request_wait_all (2*totalcount, reqs, MPI_STATUSES_IGNORE); + + /* Finally, free everything */ + for (i=0; i< count; i++ ) { + if (NULL != objs[i]->reqs ) { + free (objs[i]->reqs ); + free (objs[i]); + } + } + + free (reqs); + + /* decrease the counter for dynamic communicators by 'count'. + Attention, this approach now requires, that we are just using + these routines for communicators which have been flagged dynamic */ + ompi_comm_num_dyncomm -=count; + + return; +} + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +#define OMPI_DPM_BASE_MAXJOBIDS 64 +void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm) +{ + int i, j, numjobids=0; + int size, rsize; + int found; + orte_jobid_t jobids[OMPI_DPM_BASE_MAXJOBIDS], thisjobid; + ompi_group_t *grp=NULL; + ompi_proc_t *proc = NULL; + + /* special case for MPI_COMM_NULL */ + if ( comm == MPI_COMM_NULL ) { + return; + } + + size = ompi_comm_size (comm); + rsize = ompi_comm_remote_size(comm); + + /* loop over all processes in local group and count number + of different jobids. */ + grp = comm->c_local_group; + for (i=0; i< size; i++) { + proc = ompi_group_peer_lookup(grp,i); + thisjobid = proc->proc_name.jobid; + found = 0; + for ( j=0; jc_remote_group; + for (i=0; i< rsize; i++) { + proc = ompi_group_peer_lookup(grp,i); + thisjobid = proc->proc_name.jobid; + found = 0; + for ( j=0; j 1 ) { + ompi_comm_num_dyncomm++; + OMPI_COMM_SET_DYNAMIC(comm); + } + + return; +} diff --git a/ompi/mca/dpm/base/dpm_base_open.c b/ompi/mca/dpm/base/dpm_base_open.c new file mode 100644 index 0000000000..420667a1d8 --- /dev/null +++ b/ompi/mca/dpm/base/dpm_base_open.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "ompi/mca/dpm/dpm.h" +#include "ompi/mca/dpm/base/base.h" + +#include "ompi/mca/dpm/base/static-components.h" + +/* + * Globals + */ +OMPI_DECLSPEC int ompi_dpm_base_output = -1; +OMPI_DECLSPEC ompi_dpm_base_module_t ompi_dpm; +opal_list_t ompi_dpm_base_components_available; +ompi_dpm_base_component_t ompi_dpm_base_selected_component; + +/** + * Function for finding and opening either all MCA components, + * or the one that was specifically requested via a MCA parameter. + */ +int ompi_dpm_base_open(void) +{ + /* Debugging/Verbose output */ + ompi_dpm_base_output = opal_output_open(NULL); + + /* Open up all available components */ + if (OPAL_SUCCESS != + mca_base_components_open("dpm", + ompi_dpm_base_output, + mca_dpm_base_static_components, + &ompi_dpm_base_components_available, + true)) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/dpm/base/dpm_base_select.c b/ompi/mca/dpm/base/dpm_base_select.c new file mode 100644 index 0000000000..e4cb3b836c --- /dev/null +++ b/ompi/mca/dpm/base/dpm_base_select.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/base/mca_base_component_repository.h" + +#include "ompi/mca/dpm/dpm.h" +#include "ompi/mca/dpm/base/base.h" + + +int ompi_dpm_base_select(void) +{ + opal_list_item_t *item; + mca_base_component_list_item_t *cli; + ompi_dpm_base_component_t *component, *best_component = NULL; + ompi_dpm_base_module_t *module, *best_module = NULL; + int priority, best_priority = -1; + int rc; + + /* Query all the opened components and see if they want to run */ + + for (item = opal_list_get_first(&ompi_dpm_base_components_available); + opal_list_get_end(&ompi_dpm_base_components_available) != item; + item = opal_list_get_next(item)) { + cli = (mca_base_component_list_item_t *) item; + component = (ompi_dpm_base_component_t *) cli->cli_component; + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: querying component %s", + component->dpm_version.mca_component_name)); + + /* Call the component's init function and see if it wants to be + selected */ + + module = component->dpm_init(&priority); + + /* If we got a non-NULL module back, then the component wants + to be considered for selection */ + + if (NULL != module) { + /* If this is the best one, save it */ + if (priority > best_priority) { + + /* If there was a previous best one, finalize */ + if (NULL != best_module) { + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: found better component - finalizing component %s", + best_component->dpm_version.mca_component_name)); + + best_module->finalize(); + } + + /* Save the new best one */ + best_module = module; + best_component = component; + + /* update the best priority */ + best_priority = priority; + } else { + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: component %s does did not win the election", + component->dpm_version.mca_component_name)); + + if (NULL == module->finalize) { + opal_output(ompi_dpm_base_output, + "It appears you are the victim of a stale library - please delete your installation lib directory and reinstall"); + } else { + module->finalize(); + } + } + } + } + + /* If we didn't find one to select, barf */ + + if (NULL == best_component) { + return OMPI_ERROR; + } + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: component %s was selected", + best_component->dpm_version.mca_component_name)); + + /* We have happiness -- save the component and module for later + usage */ + + ompi_dpm = *best_module; + ompi_dpm_base_selected_component = *best_component; + + /* unload all components that were not selected */ + item = opal_list_get_first(&ompi_dpm_base_components_available); + while(item != opal_list_get_end(&ompi_dpm_base_components_available)) { + opal_list_item_t* next = opal_list_get_next(item); + ompi_dpm_base_component_t* component; + cli = (mca_base_component_list_item_t *) item; + component = (ompi_dpm_base_component_t *) cli->cli_component; + if(component != best_component) { + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: module %s unloaded", + component->dpm_version.mca_component_name)); + + mca_base_component_repository_release((mca_base_component_t *) component); + opal_list_remove_item(&ompi_dpm_base_components_available, item); + OBJ_RELEASE(item); + } + item = next; + } + + /* init the selected module */ + if (NULL != ompi_dpm.init) { + if (OMPI_SUCCESS != (rc = ompi_dpm.init())) { + return rc; + } + } + return OMPI_SUCCESS; +} diff --git a/orte/mca/sds/lsf/configure.params b/ompi/mca/dpm/base/help-ompi-dpm-base.txt similarity index 72% rename from orte/mca/sds/lsf/configure.params rename to ompi/mca/dpm/base/help-ompi-dpm-base.txt index 8fc44480a6..3ce9616d36 100644 --- a/orte/mca/sds/lsf/configure.params +++ b/ompi/mca/dpm/base/help-ompi-dpm-base.txt @@ -1,22 +1,20 @@ -# -*- shell-script -*- + -*- text -*- # -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2007 Los Alamos National Security, LLC. All rights -# reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # - -PARAM_CONFIG_FILES="Makefile" +# This is the US/English general help file for OMPI DPM framework. +# diff --git a/ompi/mca/dpm/dpm.h b/ompi/mca/dpm/dpm.h new file mode 100644 index 0000000000..c0bd9e75b1 --- /dev/null +++ b/ompi/mca/dpm/dpm.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Dynamic Process Management Interface + * + */ + +#ifndef OMPI_MCA_DPM_H +#define OMPI_MCA_DPM_H + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/class/opal_object.h" + +#include "ompi/info/info.h" +#include "ompi/communicator/communicator.h" + +BEGIN_C_DECLS + +/* OMPI port definitions */ +#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX + +#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1 +#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2 +#define OMPI_RML_TAG_XOPENIB OMPI_RML_TAG_BASE+3 +#define OMPI_RML_TAG_COMM_CID_INTRA OMPI_RML_TAG_BASE+4 +#define OMPI_RML_TAG_XOOB OMPI_RML_TAG_BASE+5 +#define OMPI_RML_TAG_SM_BACK_FILE_CREATED OMPI_RML_TAG_BASE+6 +#define OMPI_RML_TAG_WIREUP OMPI_RML_TAG_BASE+7 +#define OMPI_CRCP_COORD_BOOKMARK_TAG OMPI_RML_TAG_BASE+8 +#define OMPI_COMM_JOIN_TAG OMPI_RML_TAG_BASE+9 + +#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200 + + +/* + * Initialize a module + */ +typedef int (*ompi_dpm_base_module_init_fn_t)(void); + +/* + * Connect/accept communications + */ +typedef int (*ompi_dpm_base_module_connect_accept_fn_t)(ompi_communicator_t *comm, int root, + orte_process_name_t *port, bool send_first, + ompi_communicator_t **newcomm, orte_rml_tag_t tag); + +/** + * Executes internally a disconnect on all dynamic communicators + * in case the user did not disconnect them. + */ +typedef void (*ompi_dpm_base_module_disconnect_fn_t)(ompi_communicator_t *comm); + +/* + * Dynamically spawn processes + */ +typedef int (*ompi_dpm_base_module_spawn_fn_t)(int count, char **array_of_commands, + char ***array_of_argv, + int *array_of_maxprocs, + MPI_Info *array_of_info, + char *port_name); + +/* + * This routine checks, whether an application has been spawned + * by another MPI application, or has been independently started. + * If it has been spawned, it establishes the parent communicator. + * Since the routine has to communicate, it should be among the last + * steps in MPI_Init, to be sure that everything is already set up. + */ +typedef int (*ompi_dpm_base_module_dyn_init_fn_t)(void); + +/* + * Interface for mpi_finalize to call to ensure dynamically spawned procs + * collectively finalize + */ +typedef int (*ompi_dpm_base_module_dyn_finalize_fn_t)(void); + +/* this routine counts the number of different jobids of the processes + given in a certain communicator. If there is more than one jobid, + we mark the communicator as 'dynamic'. This is especially relevant + for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have + to wait for all still connected processes. +*/ +typedef void (*ompi_dpm_base_module_mark_dyncomm_fn_t)(ompi_communicator_t *comm); + +/* + * Open a port to interface to a dynamically spawned job + */ +typedef int (*ompi_dpm_base_module_open_port_fn_t)(char *port_name); + +/* + * Parse a port name to get the contact info and tag + */ +typedef char* (*ompi_dpm_base_module_parse_port_fn_t)(char *port_name, orte_rml_tag_t *tag); + +/* + * Close a port + */ +typedef int (*ompi_dpm_base_module_close_port_fn_t)(char *port_name); + +/* + * Finalize a module + */ +typedef int (*ompi_dpm_base_module_finalize_fn_t)(void); + +/** +* Structure for DPM v1.0.0 modules + */ +struct ompi_dpm_base_module_1_0_0_t { + /** Initialization Function */ + ompi_dpm_base_module_init_fn_t init; + /* connect/accept */ + ompi_dpm_base_module_connect_accept_fn_t connect_accept; + /* disconnect */ + ompi_dpm_base_module_disconnect_fn_t disconnect; + /* spawn processes */ + ompi_dpm_base_module_spawn_fn_t spawn; + /* dyn_init */ + ompi_dpm_base_module_dyn_init_fn_t dyn_init; + /* dyn_finalize */ + ompi_dpm_base_module_dyn_finalize_fn_t dyn_finalize; + /* mark dyncomm */ + ompi_dpm_base_module_mark_dyncomm_fn_t mark_dyncomm; + /* open port */ + ompi_dpm_base_module_open_port_fn_t open_port; + /* parse port */ + ompi_dpm_base_module_parse_port_fn_t parse_port; + /* close port */ + ompi_dpm_base_module_close_port_fn_t close_port; + /* finalize */ + ompi_dpm_base_module_finalize_fn_t finalize; +}; +typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_1_0_0_t; +typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_t; + +OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm; + + +typedef struct ompi_dpm_base_module_1_0_0_t* +(*ompi_dpm_base_component_init_fn_t)(int *priority); + + +/** + * Structure for DPM v1.0.0 components. + */ +struct ompi_dpm_base_component_1_0_0_t { + /** MCA base component */ + mca_base_component_t dpm_version; + /** MCA base data */ + mca_base_component_data_1_0_0_t dpm_data; + /* component selection */ + ompi_dpm_base_component_init_fn_t dpm_init; +}; +typedef struct ompi_dpm_base_component_1_0_0_t ompi_dpm_base_component_1_0_0_t; +typedef struct ompi_dpm_base_component_1_0_0_t ompi_dpm_base_component_t; + +/** + * Macro for use in components that are of type CRCP v1.0.0 + */ +#define OMPI_DPM_BASE_VERSION_1_0_0 \ + /* DPM v1.0 is chained to MCA v1.0 */ \ + MCA_BASE_VERSION_1_0_0, \ + /* DPM v1.0 */ \ + "dpm", 1, 0, 0 + + +END_C_DECLS + +#endif /* OMPI_MCA_DPM_H */ diff --git a/ompi/mca/dpm/orte/Makefile.am b/ompi/mca/dpm/orte/Makefile.am new file mode 100644 index 0000000000..57f9d04d2f --- /dev/null +++ b/ompi/mca/dpm/orte/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +dist_pkgdata_DATA = help-ompi-dpm-orte.txt + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_dpm_orte_DSO +component_noinst = +component_install = mca_dpm_orte.la +else +component_noinst = libmca_dpm_orte.la +component_install = +endif + +local_sources = \ + dpm_orte.c \ + dpm_orte.h \ + dpm_orte_component.c + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_dpm_orte_la_SOURCES = $(local_sources) +mca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_dpm_orte_la_SOURCES = $(local_sources) +libmca_dpm_orte_la_LIBADD = $(dpm_orte_LIBS) +libmca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS) + diff --git a/orte/mca/errmgr/bproc/configure.params b/ompi/mca/dpm/orte/configure.params similarity index 100% rename from orte/mca/errmgr/bproc/configure.params rename to ompi/mca/dpm/orte/configure.params diff --git a/ompi/mca/dpm/orte/dpm_orte.c b/ompi/mca/dpm/orte/dpm_orte.c new file mode 100644 index 0000000000..d3f8440556 --- /dev/null +++ b/ompi/mca/dpm/orte/dpm_orte.c @@ -0,0 +1,920 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include +#include +#include + +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/util/opal_getcwd.h" + +#include "opal/dss/dss.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/grpcomm/grpcomm.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/routed.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_data_server.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/proc/proc.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/info/info.h" +#include "ompi/runtime/ompi_module_exchange.h" + +#include "ompi/mca/dpm/base/base.h" +#include "dpm_orte.h" + +/* Local static variables */ +static opal_mutex_t ompi_dpm_port_mutex; +static orte_rml_tag_t next_tag; + + +/* + * Init the module + */ +static int init(void) +{ + OBJ_CONSTRUCT(&ompi_dpm_port_mutex, opal_mutex_t); + next_tag = OMPI_RML_TAG_DYNAMIC; + + return OMPI_SUCCESS; +} + +static int get_rport (orte_process_name_t *port, + int send_first, struct ompi_proc_t *proc, + orte_rml_tag_t tag, orte_process_name_t *rport); + + +static int connect_accept ( ompi_communicator_t *comm, int root, + orte_process_name_t *port, bool send_first, + ompi_communicator_t **newcomm, orte_rml_tag_t tag ) +{ + int size, rsize, rank, rc; + orte_std_cntr_t num_vals; + orte_std_cntr_t rnamebuflen = 0; + int rnamebuflen_int = 0; + void *rnamebuf=NULL; + + ompi_communicator_t *newcomp=MPI_COMM_NULL; + ompi_proc_t **rprocs=NULL; + ompi_group_t *group=comm->c_local_group; + orte_process_name_t *rport=NULL, tmp_port_name; + opal_buffer_t *nbuf=NULL, *nrbuf=NULL; + ompi_proc_t **proc_list=NULL, **new_proc_list; + int i,j, new_proc_len; + ompi_group_t *new_group_pointer; + + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:connect_accept with port %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(port))); + + size = ompi_comm_size ( comm ); + rank = ompi_comm_rank ( comm ); + + /* tell the progress engine to tick the event library more + often, to make sure that the OOB messages get sent */ + opal_progress_event_users_increment(); + + if ( rank == root ) { + /* The process receiving first does not have yet the contact + information of the remote process. Therefore, we have to + exchange that. + */ + + if(!OMPI_GROUP_IS_DENSE(group)) { + proc_list = (ompi_proc_t **) calloc (group->grp_proc_count, + sizeof (ompi_proc_t *)); + for(i=0 ; igrp_proc_count ; i++) + proc_list[i] = ompi_group_peer_lookup(group,i); + } + + if ( OMPI_COMM_JOIN_TAG != tag ) { + if(OMPI_GROUP_IS_DENSE(group)){ + rc = get_rport(port,send_first, + group->grp_proc_pointers[rank], tag, + &tmp_port_name); + } + else { + rc = get_rport(port,send_first, + proc_list[rank], tag, + &tmp_port_name); + } + if (OMPI_SUCCESS != rc) { + return rc; + } + rport = &tmp_port_name; + } else { + rport = port; + } + + /* Generate the message buffer containing the number of processes and the list of + participating processes */ + nbuf = OBJ_NEW(opal_buffer_t); + if (NULL == nbuf) { + return OMPI_ERROR; + } + + if (ORTE_SUCCESS != (rc = opal_dss.pack(nbuf, &size, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + + if(OMPI_GROUP_IS_DENSE(group)) { + ompi_proc_pack(group->grp_proc_pointers, size, nbuf); + } + else { + ompi_proc_pack(proc_list, size, nbuf); + } + + nrbuf = OBJ_NEW(opal_buffer_t); + if (NULL == nrbuf ) { + rc = OMPI_ERROR; + goto exit; + } + + /* Exchange the number and the list of processes in the groups */ + if ( send_first ) { + rc = orte_rml.send_buffer(rport, nbuf, tag, 0); + rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); + } else { + rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); + rc = orte_rml.send_buffer(rport, nbuf, tag, 0); + } + + if (ORTE_SUCCESS != (rc = opal_dss.unload(nrbuf, &rnamebuf, &rnamebuflen))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + } + + /* First convert the size_t to an int so we can cast in the bcast to a void * + * if we don't then we will get badness when using big vs little endian + * THIS IS NO LONGER REQUIRED AS THE LENGTH IS NOW A STD_CNTR_T, WHICH + * CORRELATES TO AN INT32 + */ + rnamebuflen_int = (int)rnamebuflen; + + /* bcast the buffer-length to all processes in the local comm */ + rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm, + comm->c_coll.coll_bcast_module); + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + rnamebuflen = rnamebuflen_int; + + if ( rank != root ) { + /* non root processes need to allocate the buffer manually */ + rnamebuf = (char *) malloc(rnamebuflen); + if ( NULL == rnamebuf ) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + + /* bcast list of processes to all procs in local group + and reconstruct the data. Note that proc_get_proclist + adds processes, which were not known yet to our + process pool. + */ + rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm, + comm->c_coll.coll_bcast_module); + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + + nrbuf = OBJ_NEW(opal_buffer_t); + if (NULL == nrbuf) { + goto exit; + } + if ( ORTE_SUCCESS != ( rc = opal_dss.load(nrbuf, rnamebuf, rnamebuflen))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + + num_vals = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(nrbuf, &rsize, &num_vals, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + + rc = ompi_proc_unpack(nrbuf, rsize, &rprocs, &new_proc_len, &new_proc_list); + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + + /* If we added new procs, we need to do the modex and then call + PML add_procs */ + if (new_proc_len > 0) { + opal_list_t all_procs; + orte_namelist_t *name; + + OBJ_CONSTRUCT(&all_procs, opal_list_t); + + if (send_first) { + for (i = 0 ; i < group->grp_proc_count ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = ompi_group_peer_lookup(group, i)->proc_name; + opal_list_append(&all_procs, &name->item); + } + + for (i = 0 ; i < rsize ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = rprocs[i]->proc_name; + opal_list_append(&all_procs, &name->item); + } + } else { + for (i = 0 ; i < rsize ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = rprocs[i]->proc_name; + opal_list_append(&all_procs, &name->item); + } + + for (i = 0 ; i < group->grp_proc_count ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = ompi_group_peer_lookup(group, i)->proc_name; + opal_list_append(&all_procs, &name->item); + } + } + + if (OMPI_SUCCESS != (rc = orte_grpcomm.modex(&all_procs))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + + /* + while (NULL != (item = opal_list_remove_first(&all_procs))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&all_procs); + */ + + MCA_PML_CALL(add_procs(new_proc_list, new_proc_len)); + } + + OBJ_RELEASE(nrbuf); + if ( rank == root ) { + OBJ_RELEASE(nbuf); + } + + new_group_pointer=ompi_group_allocate(rsize); + if( NULL == new_group_pointer ) { + return MPI_ERR_GROUP; + } + + /* put group elements in the list */ + for (j = 0; j < rsize; j++) { + new_group_pointer->grp_proc_pointers[j] = rprocs[j]; + } /* end proc loop */ + + /* increment proc reference counters */ + ompi_group_increment_proc_count(new_group_pointer); + + /* set up communicator structure */ + rc = ompi_comm_set ( &newcomp, /* new comm */ + comm, /* old comm */ + group->grp_proc_count, /* local_size */ + NULL, /* local_procs */ + rsize, /* remote_size */ + NULL , /* remote_procs */ + NULL, /* attrs */ + comm->error_handler, /* error handler */ + NULL, /* topo component */ + group, /* local group */ + new_group_pointer /* remote group */ + ); + if ( NULL == newcomp ) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + ompi_group_decrement_proc_count (new_group_pointer); + OBJ_RELEASE(new_group_pointer); + new_group_pointer = MPI_GROUP_NULL; + + /* allocate comm_cid */ + rc = ompi_comm_nextcid ( newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + rport, /* remote leader */ + OMPI_COMM_CID_INTRA_OOB, /* mode */ + send_first ); /* send or recv first */ + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + + /* activate comm and init coll-component */ + rc = ompi_comm_activate ( newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + rport, /* remote leader */ + OMPI_COMM_CID_INTRA_OOB, /* mode */ + send_first, /* send or recv first */ + 0); /* sync_flag */ + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + + /* Question: do we have to re-start some low level stuff + to enable the usage of fast communication devices + between the two worlds ? + */ + + + exit: + /* done with OOB and such - slow our tick rate again */ + opal_progress(); + opal_progress_event_users_decrement(); + + if ( NULL != rprocs ) { + free ( rprocs ); + } + if ( NULL != proc_list ) { + free ( proc_list ); + } + if ( OMPI_SUCCESS != rc ) { + if ( MPI_COMM_NULL != newcomp && NULL != newcomp ) { + OBJ_RETAIN(newcomp); + newcomp = MPI_COMM_NULL; + } + } + + *newcomm = newcomp; + return rc; +} + +static void disconnect(ompi_communicator_t *comm) +{ + ompi_dpm_base_disconnect_obj *dobj; + + dobj = ompi_dpm_base_disconnect_init (comm); + ompi_dpm_base_disconnect_waitall(1, &dobj); + +} + + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* + * This routine is necessary, since in the connect/accept case, the processes + * executing the connect operation have the OOB contact information of the + * leader of the remote group, however, the processes executing the + * accept get their own port_name = OOB contact information passed in as + * an argument. This is however useless. + * + * Therefore, the two root processes exchange this information at this + * point. + * + */ +int get_rport(orte_process_name_t *port, int send_first, + ompi_proc_t *proc, orte_rml_tag_t tag, + orte_process_name_t *rport_name) +{ + int rc; + orte_std_cntr_t num_vals; + + if ( send_first ) { + opal_buffer_t *sbuf; + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:get_rport sending to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(port))); + + sbuf = OBJ_NEW(opal_buffer_t); + if (NULL == sbuf) { + return OMPI_ERROR; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(sbuf, &(proc->proc_name), 1, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(sbuf); + return rc; + } + + rc = orte_rml.send_buffer(port, sbuf, tag, 0); + OBJ_RELEASE(sbuf); + if ( 0 > rc ) { + ORTE_ERROR_LOG(rc); + return rc; + } + + *rport_name = *port; + } else { + opal_buffer_t *rbuf; + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:get_rport waiting to recv", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + rbuf = OBJ_NEW(opal_buffer_t); + if (NULL == rbuf) { + return ORTE_ERROR; + } + if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, rbuf, tag, 0))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(rbuf); + return rc; + } + + num_vals = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, rport_name, &num_vals, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(rbuf); + return rc; + } + OBJ_RELEASE(rbuf); + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:get_rport recv'd name %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(rport_name))); + } + + return OMPI_SUCCESS; +} + + +static int spawn(int count, char **array_of_commands, + char ***array_of_argv, + int *array_of_maxprocs, + MPI_Info *array_of_info, + char *port_name) +{ + int rc, i, j, counter; + int have_wdir=0; + bool have_prefix; + int valuelen=OMPI_PATH_MAX, flag=0; + char cwd[OMPI_PATH_MAX]; + char host[OMPI_PATH_MAX]; /*** should define OMPI_HOST_MAX ***/ + char prefix[OMPI_PATH_MAX]; + char *base_prefix=NULL; + + orte_job_t *jdata; + orte_std_cntr_t dummy; + orte_app_context_t *app; + + bool timing = false; + struct timeval ompistart, ompistop; + + /* parse the info object */ + /* check potentially for: + - "host": desired host where to spawn the processes + - "hostfile": hostfile containing hosts where procs are + to be spawned + - "add-host": add the specified hosts to the known list + of available resources and spawn these + procs on them + - "add-hostfile": add the hosts in the hostfile to the + known list of available resources and spawn + these procs on them + - "prefix": the path to the root of the directory tree where ompi + executables and libraries can be found on all nodes + used to spawn these procs + - "arch": desired architecture + - "wdir": directory, where executable can be found + - "path": list of directories where to look for the executable + - "file": filename, where additional information is provided. + - "soft": see page 92 of MPI-2. + */ + + /* make sure the progress engine properly trips the event library */ + opal_progress_event_users_increment(); + + /* setup the job object */ + jdata = OBJ_NEW(orte_job_t); + + /* Convert the list of commands to an array of orte_app_context_t + pointers */ + for (i = 0; i < count; ++i) { + app = OBJ_NEW(orte_app_context_t); + if (NULL == app) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + /* add the app to the job data */ + orte_pointer_array_add(&dummy, jdata->apps, app); + jdata->num_apps++; + + /* copy over the name of the executable */ + app->app = strdup(array_of_commands[i]); + if (NULL == app->app) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + /* record the number of procs to be generated */ + app->num_procs = array_of_maxprocs[i]; + + /* copy over the argv array */ + counter = 1; + + if (MPI_ARGVS_NULL != array_of_argv && + MPI_ARGV_NULL != array_of_argv[i]) { + /* first need to find out how many entries there are */ + j=0; + while (NULL != array_of_argv[i][j]) { + j++; + } + counter += j; + } + + /* now copy them over, ensuring to NULL terminate the array */ + app->argv = (char**)malloc((1 + counter) * sizeof(char*)); + if (NULL == app->argv) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + app->argv[0] = strdup(array_of_commands[i]); + for (j=1; j < counter; j++) { + app->argv[j] = strdup(array_of_argv[i][j-1]); + } + app->argv[counter] = NULL; + + + /* the environment gets set by the launcher + * all we need to do is add the specific values + * needed for comm_spawn + */ + /* Add environment variable with the contact information for the + child processes. + */ + counter = 1; + app->env = (char**)malloc((1+counter) * sizeof(char*)); + if (NULL == app->env) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + asprintf(&(app->env[0]), "OMPI_PARENT_PORT=%s", port_name); + app->env[1] = NULL; + for (j = 0; NULL != environ[j]; ++j) { + if (0 == strncmp("OMPI_", environ[j], 5)) { + opal_argv_append_nosize(&app->env, environ[j]); + } + } + + /* Check for well-known info keys */ + have_wdir = 0; + have_prefix = false; + if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { + + /* check for 'host' */ + ompi_info_get (array_of_info[i], "host", sizeof(host), host, &flag); + if ( flag ) { + app->num_map = 1; + app->map_data = (orte_app_context_map_t **) malloc(sizeof(orte_app_context_map_t *)); + app->map_data[0] = OBJ_NEW(orte_app_context_map_t); + app->map_data[0]->map_type = ORTE_APP_CONTEXT_MAP_HOSTNAME; + app->map_data[0]->map_data = strdup(host); + } + + /* check for 'hostfile' */ + ompi_info_get (array_of_info[i], "hostfile", sizeof(host), host, &flag); + if ( flag ) { + app->hostfile = strdup(host); + } + + /* check for 'add-host' */ + ompi_info_get (array_of_info[i], "add-host", sizeof(host), host, &flag); + if ( flag ) { + app->num_map = 1; + app->map_data = (orte_app_context_map_t **) malloc(sizeof(orte_app_context_map_t *)); + app->map_data[0] = OBJ_NEW(orte_app_context_map_t); + app->map_data[0]->map_type = ORTE_APP_CONTEXT_MAP_ADD_HOSTNAME; + app->map_data[0]->map_data = strdup(host); + } + + /* check for 'add-hostfile' */ + ompi_info_get (array_of_info[i], "add-hostfile", sizeof(host), host, &flag); + if ( flag ) { + app->add_hostfile = strdup(host); + } + + /* 'path', 'arch', 'file', 'soft' -- to be implemented */ + + /* check for 'ompi_prefix' (OMPI-specific -- to effect the same + * behavior as --prefix option to orterun) + */ + ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix), prefix, &flag); + if ( flag ) { + app->prefix_dir = strdup(prefix); + have_prefix = true; + } + + /* check for 'wdir' */ + ompi_info_get (array_of_info[i], "wdir", valuelen, cwd, &flag); + if ( flag ) { + app->cwd = strdup(cwd); + have_wdir = 1; + } + + /* check for 'ompi_local_slave' - OMPI-specific -- indicates that + * the specified app is to be launched by the local orted as a + * "slave" process, typically to support an attached co-processor + */ + ompi_info_get_bool(array_of_info[i], "ompi_local_slave", &jdata->local_spawn, &flag); + + } + + /* default value: If the user did not tell us where to look for the + executable, we assume the current working directory */ + if ( !have_wdir ) { + if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OMPI_PATH_MAX))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return rc; + } + app->cwd = strdup(cwd); + } + + /* if the user told us a new prefix, then we leave it alone. otherwise, if + * a prefix had been provided before, copy that one into the new app_context + * for use by the spawned children + */ + if ( !have_prefix && NULL != base_prefix) { + app->prefix_dir = strdup(base_prefix); + } + + /* leave the map info alone - the launcher will + * decide where to put things + */ + } /* for (i = 0 ; i < count ; ++i) */ + + /* cleanup */ + if (NULL != base_prefix) { + free(base_prefix); + } + + /* check for timing request - get stop time and report elapsed time if so */ + if (timing) { + if (0 != gettimeofday(&ompistop, NULL)) { + opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); + } else { + opal_output(0, "ompi_comm_start_procs: time from start to prepare to spawn %ld usec", + (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + + (ompistop.tv_usec - ompistart.tv_usec))); + if (0 != gettimeofday(&ompistart, NULL)) { + opal_output(0, "ompi_comm_start_procs: could not obtain new start time"); + ompistart.tv_sec = ompistop.tv_sec; + ompistart.tv_usec = ompistop.tv_usec; + } + } + } + + /* spawn procs */ + rc = orte_plm.spawn(jdata); + OBJ_RELEASE(jdata); + + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + opal_progress_event_users_decrement(); + return MPI_ERR_SPAWN; + } + + /* check for timing request - get stop time and report elapsed time if so */ + if (timing) { + if (0 != gettimeofday(&ompistop, NULL)) { + opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); + } else { + opal_output(0, "ompi_comm_start_procs: time to spawn %ld usec", + (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + + (ompistop.tv_usec - ompistart.tv_usec))); + } + } + + /* clean up */ + opal_progress_event_users_decrement(); + + return OMPI_SUCCESS; +} + +static int open_port(char *port_name) +{ + char *rml_uri, *ptr, tag[12]; + int rc; + + /* + * The port_name is equal to the OOB-contact information + * and an RML tag. The reason for adding the tag is + * to make the port unique for multi-threaded scenarios. + */ + + if (NULL == (rml_uri = orte_rml.get_contact_info())) { + return OMPI_ERR_NOT_AVAILABLE; + } + + sprintf(tag, "%d", (int)next_tag); + + /* if the overall port name is too long, we try to truncate the rml uri */ + rc = 0; + while ((strlen(rml_uri)+strlen(tag)) > (MPI_MAX_PORT_NAME-2)) { + /* if we have already tried several times, punt! */ + if (4 < rc) { + free(rml_uri); + return OMPI_ERROR; + } + /* find the trailing uri and truncate there */ + ptr = strrchr(rml_uri, ';'); + *ptr = '\0'; + ++rc; + } + + OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); + sprintf (port_name, "%s:%s", rml_uri, tag); + next_tag++; + OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); + + free ( rml_uri ); + + return OMPI_SUCCESS; +} + +/* takes a port_name and separates it into the RML URI +* and the tag +*/ +static char *parse_port (char *port_name, orte_rml_tag_t *tag) +{ + char *tmp_string, *ptr; + + /* find the ':' demarking the RML tag we added to the end */ + if (NULL == (ptr = strrchr(port_name, ':'))) { + return NULL; + } + + /* terminate the port_name at that location */ + *ptr = '\0'; + ptr++; + + /* convert the RML tag */ + sscanf(ptr,"%d", (int*)tag); + + /* see if the length of the RML uri is too long - if so, + * truncate it + */ + if (strlen(port_name) > MPI_MAX_PORT_NAME) { + port_name[MPI_MAX_PORT_NAME] = '\0'; + } + + /* copy the RML uri so we can return a malloc'd value + * that can later be free'd + */ + tmp_string = strdup(port_name); + + return tmp_string; +} + +static int close_port(char *port_name) +{ + return OMPI_SUCCESS; +} + +static int dyn_init(void) +{ + char *oob_port=NULL; + char *port_name=NULL; + int root=0, rc; + bool send_first = true; + orte_rml_tag_t tag; + ompi_communicator_t *newcomm=NULL; + orte_process_name_t port_proc_name; + ompi_group_t *group = NULL; + ompi_errhandler_t *errhandler = NULL; + + ompi_communicator_t *oldcomm; + + /* if env-variable is set, we are a dynamically spawned + * child - parse port and call comm_connect_accept */ + if (NULL == (port_name = ompi_dpm_base_dyn_init())) { + /* nothing to do */ + return OMPI_SUCCESS; + } + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:dyn_init with port %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + port_name)); + + /* split the content of the environment variable into + its pieces, which are RML-uri:tag */ + oob_port = parse_port (port_name, &tag); + + /* set the contact info into the local hash table */ + if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(oob_port))) { + ORTE_ERROR_LOG(rc); + free(oob_port); + return(rc); + } + + /* process the RML uri to get the port's process name */ + if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(oob_port, &port_proc_name, NULL))) { + ORTE_ERROR_LOG(rc); + free(oob_port); + return rc; + } + free(oob_port); /* done with this */ + + /* update the route to this process - in this case, we always give it + * as direct since we were given the contact info. We trust the + * selected routed component to do the Right Thing for its own mode + * of operation + */ + if (ORTE_SUCCESS != (rc = orte_routed.update_route(&port_proc_name, &port_proc_name))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:dyn_init calling connect_accept to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&port_proc_name))); + + rc = connect_accept (MPI_COMM_WORLD, root, &port_proc_name, + send_first, &newcomm, tag ); + if (OMPI_SUCCESS != rc) { + return rc; + } + + /* Set the parent communicator */ + ompi_mpi_comm_parent = newcomm; + + /* originally, we set comm_parent to comm_null (in comm_init), + * now we have to decrease the reference counters to the according + * objects + */ + + oldcomm = &ompi_mpi_comm_null; + OBJ_RELEASE(oldcomm); + group = &ompi_mpi_group_null; + OBJ_RELEASE(group); + errhandler = &ompi_mpi_errors_are_fatal; + OBJ_RELEASE(errhandler); + + /* Set name for debugging purposes */ + snprintf(newcomm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT"); + + return OMPI_SUCCESS; +} + + +/* + * finalize the module + */ +static int finalize(void) +{ + OBJ_DESTRUCT(&ompi_dpm_port_mutex); + return OMPI_SUCCESS; +} + +/* + * instantiate the module + */ +ompi_dpm_base_module_t ompi_dpm_orte_module = { + init, + connect_accept, + disconnect, + spawn, + dyn_init, + ompi_dpm_base_dyn_finalize, + ompi_dpm_base_mark_dyncomm, + open_port, + parse_port, + close_port, + finalize +}; + + diff --git a/ompi/mca/dpm/orte/dpm_orte.h b/ompi/mca/dpm/orte/dpm_orte.h new file mode 100644 index 0000000000..71c70a3555 --- /dev/null +++ b/ompi/mca/dpm/orte/dpm_orte.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_DPM_ORTE_H +#define OMPI_DPM_ORTE_H + +#include "ompi_config.h" + +#include "orte/types.h" + +#include "ompi/mca/dpm/dpm.h" + +BEGIN_C_DECLS + +/* access to module */ +extern ompi_dpm_base_module_t ompi_dpm_orte_module; + +OMPI_MODULE_DECLSPEC extern ompi_dpm_base_component_t mca_dpm_orte_component; + +END_C_DECLS + +#endif /* OMPI_DPM_ORTE_H */ diff --git a/ompi/mca/dpm/orte/dpm_orte_component.c b/ompi/mca/dpm/orte/dpm_orte_component.c new file mode 100644 index 0000000000..50ada4d1a9 --- /dev/null +++ b/ompi/mca/dpm/orte/dpm_orte_component.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "dpm_orte.h" + +static int dpm_orte_component_open(void); +static int dpm_orte_component_close(void); +static ompi_dpm_base_module_t* dpm_orte_component_init( int* priority ); + +ompi_dpm_base_component_t mca_dpm_orte_component = { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a dpm v1.0.0 component (which also implies + a specific MCA version) */ + + OMPI_DPM_BASE_VERSION_1_0_0, + + "orte", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + dpm_orte_component_open, /* component open */ + dpm_orte_component_close /* component close */ + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* This component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + dpm_orte_component_init, /* component init */ +}; + + +int dpm_orte_component_open(void) +{ + return OMPI_SUCCESS; +} + +int dpm_orte_component_close(void) +{ + return OMPI_SUCCESS; +} + +ompi_dpm_base_module_t* dpm_orte_component_init(int* priority) +{ + *priority = 50; + + return &ompi_dpm_orte_module; +} diff --git a/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt b/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt new file mode 100644 index 0000000000..68bd910377 --- /dev/null +++ b/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt @@ -0,0 +1,43 @@ +# -*- text -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open MPI. +# +[dpm-orte:no-server] +Process rank %ld attempted to %s a global ompi_server that +could not be contacted. This is typically caused by either not +specifying the contact info for the server, or by the server not +currently executing. If you did specify the contact info for a +server, please check to see that the server is running and start +it again (or have your sys admin start it) if it isn't. + +[dpm-orte:unknown-order] +Process rank %ld attempted to lookup a value but provided an +unrecognized order parameter. Order parameters are used to tell Open +MPI if it should first look for the requested value locally (i.e., from +the current job) or from a global ompi_server. Accepted order +parameters are "local" and "global", respectively. + +[dpm-orte:too-many-orders] +Process rank %ld attempted to lookup a value but provided too many +order parameters (%ld found). Order parameters are used to tell +Open MPI if it should first look for the requested value locally +(i.e., from the current job) or from a global ompi_server. Accepted +order parameters are "local" and "global", respectively, and each can +only be specified once. diff --git a/orte/mca/ns/replica/ns_replica.c b/ompi/mca/io/romio/romio/confdb/.hgfoo similarity index 100% rename from orte/mca/ns/replica/ns_replica.c rename to ompi/mca/io/romio/romio/confdb/.hgfoo diff --git a/ompi/mca/mpool/base/mpool_base_mem_cb.c b/ompi/mca/mpool/base/mpool_base_mem_cb.c index eae53a291b..69cb42bb4d 100644 --- a/ompi/mca/mpool/base/mpool_base_mem_cb.c +++ b/ompi/mca/mpool/base/mpool_base_mem_cb.c @@ -23,8 +23,8 @@ #include "opal/util/output.h" #include "mpool_base_mem_cb.h" #include "base.h" +#include "orte/types.h" #include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" opal_pointer_array_t mca_mpool_base_mem_cb_array; diff --git a/ompi/mca/mpool/base/mpool_base_tree.c b/ompi/mca/mpool/base/mpool_base_tree.c index 984ff76f49..26f3ee31ac 100644 --- a/ompi/mca/mpool/base/mpool_base_tree.c +++ b/ompi/mca/mpool/base/mpool_base_tree.c @@ -26,9 +26,10 @@ #include "opal/mca/mca.h" #include "opal/util/show_help.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/util/name_fns.h" #include "orte/util/proc_info.h" #include "orte/util/sys_info.h" +#include "orte/runtime/orte_globals.h" #include "ompi/runtime/params.h" #include "mpool_base_tree.h" @@ -172,13 +173,13 @@ void mca_mpool_base_tree_print(void) if (num_leaks <= ompi_debug_show_mpi_alloc_mem_leaks || ompi_debug_show_mpi_alloc_mem_leaks < 0) { opal_show_help("help-mpool-base.txt", "all mem leaks", - true, ORTE_NAME_PRINT(orte_process_info.my_name), + true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_system_info.nodename, orte_process_info.pid, leak_msg); } else { int i = num_leaks - ompi_debug_show_mpi_alloc_mem_leaks; opal_show_help("help-mpool-base.txt", "some mem leaks", - true, ORTE_NAME_PRINT(orte_process_info.my_name), + true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_system_info.nodename, orte_process_info.pid, leak_msg, i, (i > 1) ? "s were" : " was", diff --git a/ompi/mca/mpool/rdma/mpool_rdma_module.c b/ompi/mca/mpool/rdma/mpool_rdma_module.c index c2d2fe8547..208d8d00e4 100644 --- a/ompi/mca/mpool/rdma/mpool_rdma_module.c +++ b/ompi/mca/mpool/rdma/mpool_rdma_module.c @@ -25,6 +25,8 @@ #include "ompi_config.h" #include "opal/include/opal/align.h" #include "orte/util/proc_info.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" #include "opal/util/output.h" #include "ompi/mca/mpool/rdma/mpool_rdma.h" #include @@ -431,7 +433,7 @@ void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool) if(true == mca_mpool_rdma_component.print_stats) { opal_output(0, "%s rdma: stats " "(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss, mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound, mpool_rdma->stat_evicted); diff --git a/ompi/mca/mtl/mx/mtl_mx_endpoint.c b/ompi/mca/mtl/mx/mtl_mx_endpoint.c index a4aad8ed6d..dacf18217c 100644 --- a/ompi/mca/mtl/mx/mtl_mx_endpoint.c +++ b/ompi/mca/mtl/mx/mtl_mx_endpoint.c @@ -21,10 +21,6 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/errmgr/errmgr.h" #include "opal/util/output.h" #include "mtl_mx.h" #include "mtl_mx_types.h" diff --git a/ompi/mca/pml/base/pml_base_select.c b/ompi/mca/pml/base/pml_base_select.c index 9d994e1014..c8b9c7bf85 100644 --- a/ompi/mca/pml/base/pml_base_select.c +++ b/ompi/mca/pml/base/pml_base_select.c @@ -25,7 +25,11 @@ #include "opal/runtime/opal_progress.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" + #include "orte/mca/errmgr/errmgr.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" @@ -165,10 +169,10 @@ int mca_pml_base_select(bool enable_progress_threads, if( NULL == tmp_val) { continue; } - orte_errmgr.error_detected(1, "PML %s cannot be selected", tmp_val, NULL); + orte_errmgr.abort(1, "PML %s cannot be selected", tmp_val); } if(0 == i) { - orte_errmgr.error_detected(2, "No pml component available. This shouldn't happen.", NULL); + orte_errmgr.abort(2, "No pml component available. This shouldn't happen."); } } diff --git a/ompi/mca/pml/crcpw/pml_crcpw_module.c b/ompi/mca/pml/crcpw/pml_crcpw_module.c index 861ccdc6ee..a0a75f81ed 100644 --- a/ompi/mca/pml/crcpw/pml_crcpw_module.c +++ b/ompi/mca/pml/crcpw/pml_crcpw_module.c @@ -27,7 +27,6 @@ #include "ompi/mca/btl/base/base.h" #include "ompi/mca/pml/crcpw/pml_crcpw.h" #include "ompi/mca/bml/base/base.h" -#include "orte/mca/ns/ns.h" #include "orte/mca/errmgr/errmgr.h" #include "ompi/class/ompi_free_list.h" diff --git a/ompi/mca/pml/dr/pml_dr.c b/ompi/mca/pml/dr/pml_dr.c index e311447889..2b48891301 100644 --- a/ompi/mca/pml/dr/pml_dr.c +++ b/ompi/mca/pml/dr/pml_dr.c @@ -34,7 +34,8 @@ #include "pml_dr_sendreq.h" #include "pml_dr_recvreq.h" #include "ompi/mca/bml/base/base.h" -#include "orte/mca/ns/ns.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "ompi/mca/pml/base/base.h" @@ -241,9 +242,9 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs) /* this won't work for comm spawn and other dynamic processes, but will work for initial job start */ idx = opal_pointer_array_add(&mca_pml_dr.endpoints, (void*) endpoint); - if(orte_ns.compare_fields(ORTE_NS_CMP_ALL, - orte_process_info.my_name, - &(endpoint->proc_ompi->proc_name)) == ORTE_EQUAL) { + if(orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + ORTE_PROC_MY_NAME, + &(endpoint->proc_ompi->proc_name)) == OPAL_EQUAL) { mca_pml_dr.my_rank = idx; } endpoint->local = endpoint->dst = idx; diff --git a/ompi/mca/pml/dr/pml_dr_endpoint.c b/ompi/mca/pml/dr/pml_dr_endpoint.c index ae954468fc..651811cea2 100644 --- a/ompi/mca/pml/dr/pml_dr_endpoint.c +++ b/ompi/mca/pml/dr/pml_dr_endpoint.c @@ -19,7 +19,6 @@ #include "ompi_config.h" #include "pml_dr.h" #include "pml_dr_endpoint.h" -#include "orte/mca/ns/ns.h" diff --git a/ompi/mca/pml/dr/pml_dr_recvfrag.c b/ompi/mca/pml/dr/pml_dr_recvfrag.c index 9f79baa62c..b59a728dcf 100644 --- a/ompi/mca/pml/dr/pml_dr_recvfrag.c +++ b/ompi/mca/pml/dr/pml_dr_recvfrag.c @@ -208,7 +208,7 @@ void mca_pml_dr_recv_frag_callback( return; } else { OPAL_OUTPUT((0, "%s:%d: the world as we know it is bad\n", __FILE__, __LINE__)); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } } comm = (mca_pml_dr_comm_t*)ompi_comm->c_pml_comm; @@ -241,7 +241,7 @@ void mca_pml_dr_recv_frag_callback( ompi_comm = ompi_comm_lookup(hdr->hdr_common.hdr_ctx); if(NULL == ompi_comm) { OPAL_OUTPUT((0, "%s:%d: the world as we know it is bad\n", __FILE__, __LINE__)); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } comm = (mca_pml_dr_comm_t*)ompi_comm->c_pml_comm; assert(hdr->hdr_common.hdr_src < opal_pointer_array_get_size(&comm->sparse_procs)); @@ -290,7 +290,7 @@ void mca_pml_dr_recv_frag_callback( ompi_comm = ompi_comm_lookup(hdr->hdr_common.hdr_ctx); if(NULL == ompi_comm) { MCA_PML_DR_DEBUG(0,(0, "%s:%d: the world as we know it is bad\n", __FILE__, __LINE__)); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } comm = (mca_pml_dr_comm_t*)ompi_comm->c_pml_comm; assert(hdr->hdr_common.hdr_src < opal_pointer_array_get_size(&comm->sparse_procs)); diff --git a/ompi/mca/pml/dr/pml_dr_sendreq.c b/ompi/mca/pml/dr/pml_dr_sendreq.c index 1167fcd591..8ffe014b08 100644 --- a/ompi/mca/pml/dr/pml_dr_sendreq.c +++ b/ompi/mca/pml/dr/pml_dr_sendreq.c @@ -121,7 +121,7 @@ static void mca_pml_dr_error_completion( mca_bml.bml_del_btl(btl); break; default: - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); break; } diff --git a/ompi/mca/pml/dr/pml_dr_vfrag.c b/ompi/mca/pml/dr/pml_dr_vfrag.c index 812c43aba7..3620716bf0 100644 --- a/ompi/mca/pml/dr/pml_dr_vfrag.c +++ b/ompi/mca/pml/dr/pml_dr_vfrag.c @@ -168,7 +168,7 @@ void mca_pml_dr_vfrag_reset(mca_pml_dr_vfrag_t* vfrag) if(mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->bml_endpoint->btl_eager) == 0 || mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->bml_endpoint->btl_eager) == 0) { opal_output(0, "%s:%d:%s: no path to peer", __FILE__, __LINE__, __func__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } if(vfrag->vf_offset == 0) { vfrag->bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->bml_endpoint->btl_eager); diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index 9bb72f8af1..ca499920d9 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -41,9 +41,7 @@ #include "ompi/runtime/ompi_cr.h" #include "ompi/runtime/ompi_module_exchange.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/gpr/gpr.h" mca_pml_ob1_t mca_pml_ob1 = { { @@ -450,7 +448,7 @@ void mca_pml_ob1_process_pending_rdma(void) void mca_pml_ob1_error_handler( struct mca_btl_base_module_t* btl, int32_t flags) { - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } int mca_pml_ob1_ft_event( int state ) @@ -458,7 +456,6 @@ int mca_pml_ob1_ft_event( int state ) ompi_proc_t** procs = NULL; size_t num_procs; int ret, p; - orte_buffer_t mdx_buf, rbuf; if(OPAL_CRS_CHECKPOINT == state) { ; @@ -469,10 +466,6 @@ int mca_pml_ob1_ft_event( int state ) else if(OPAL_CRS_RESTART == state) { /* * Get a list of processes - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. */ procs = ompi_proc_all(&num_procs); if(NULL == procs) { @@ -484,43 +477,26 @@ int mca_pml_ob1_ft_event( int state ) */ opal_output_verbose(10, ompi_cr_output, "pml:ob1: ft_event(Restart): Restart Modex information"); - if (OMPI_SUCCESS != (ret = ompi_modex_finalize())) { + if (OMPI_SUCCESS != (ret = orte_grpcomm.purge_proc_attrs())) { opal_output(0, - "pml:ob1: ft_event(Restart): modex_finalize Failed %d", + "pml:ob1: ft_event(Restart): purge_modex Failed %d", ret); return ret; } /* - * Make sure the modex is NULL so it can be re-initalized + * Refresh the proc structure, and publish our proc info in the modex. + * NOTE: Do *not* call ompi_proc_finalize as there are many places in + * the code that point to indv. procs in this strucutre. For our + * needs here we only need to fix up the modex, bml and pml + * references. */ - for(p = 0; p < (int)num_procs; ++p) { - if( NULL != procs[p]->proc_modex ) { - OBJ_RELEASE(procs[p]->proc_modex); - procs[p]->proc_modex = NULL; - } - } - - /* - * Init the modex structures - */ - if (OMPI_SUCCESS != (ret = ompi_modex_init())) { + if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { opal_output(0, - "pml:ob1: ft_event(Restart): modex_init Failed %d", + "pml:ob1: ft_event(Restart): proc_refresh Failed %d", ret); return ret; } - - /* - * Load back up the hostname/arch information into the modex - */ - if (OMPI_SUCCESS != (ret = ompi_proc_publish_info())) { - opal_output(0, - "pml:ob1: ft_event(Restart): proc_init Failed %d", - ret); - return ret; - } - } else if(OPAL_CRS_TERM == state ) { ; @@ -547,39 +523,16 @@ int mca_pml_ob1_ft_event( int state ) } else if(OPAL_CRS_RESTART == state) { /* - * Exchange the modex information once again + * Exchange the modex information once again. + * BTLs will have republished their modex information. */ - OBJ_CONSTRUCT(&mdx_buf, orte_buffer_t); - if (OMPI_SUCCESS != (ret = ompi_modex_get_my_buffer(&mdx_buf))) { + if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(NULL))) { opal_output(0, - "pml:ob1: ft_event(Restart): Failed ompi_modex_get_my_buffer() = %d", + "pml:ob1: ft_event(Restart): Failed orte_grpcomm.modex() = %d", ret); return ret; } - /* - * Do the allgather exchange of information - */ - OBJ_CONSTRUCT(&rbuf, orte_buffer_t); - if (OMPI_SUCCESS != (ret = orte_grpcomm.allgather(&mdx_buf, &rbuf))) { - opal_output(0, - "pml:ob1: ft_event(Restart): Failed orte_grpcomm.allgather() = %d", - ret); - return ret; - } - OBJ_DESTRUCT(&mdx_buf); - - /* - * Process the modex data into the proc structures - */ - if (OMPI_SUCCESS != (ret = ompi_modex_process_data(&rbuf))) { - opal_output(0, - "pml:ob1: ft_event(Restart): Failed ompi_modex_process_data() = %d", - ret); - return ret; - } - OBJ_DESTRUCT(&rbuf); - /* * Fill in remote proc information */ @@ -592,7 +545,7 @@ int mca_pml_ob1_ft_event( int state ) /* * Startup the PML stack now that the modex is running again - * Add the new procs + * Add the new procs (BTLs redo modex recv's) */ if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) { opal_output(0, "pml:ob1: fr_event(Restart): Failed in add_procs (%d)", ret); diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.c b/ompi/mca/pml/ob1/pml_ob1_rdma.c index 4e64a8729e..3d109ca713 100644 --- a/ompi/mca/pml/ob1/pml_ob1_rdma.c +++ b/ompi/mca/pml/ob1/pml_ob1_rdma.c @@ -25,7 +25,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" #include "ompi/mca/bml/bml.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" #include "ompi/mca/mpool/mpool.h" #include "pml_ob1.h" #include "pml_ob1_rdma.h" diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 30da9ce5ee..cf98f0d084 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -323,7 +323,7 @@ static void mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ ORTE_ERROR_LOG(status); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } mca_pml_ob1_send_fin(recvreq->req_recv.req_base.req_proc, @@ -390,7 +390,7 @@ int mca_pml_ob1_recv_request_get_frag( mca_pml_ob1_rdma_frag_t* frag ) return OMPI_ERR_OUT_OF_RESOURCE; } else { ORTE_ERROR_LOG(rc); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } } @@ -420,7 +420,7 @@ static void mca_pml_ob1_recv_request_rget( if( OPAL_UNLIKELY(NULL == frag) ) { /* GLB - FIX */ ORTE_ERROR_LOG(rc); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* lookup bml datastructures */ @@ -442,7 +442,7 @@ static void mca_pml_ob1_recv_request_rget( frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } frag->rdma_hdr.hdr_rget = *hdr; frag->rdma_req = recvreq; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 52f521ba53..80652faeb4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -158,7 +158,7 @@ mca_pml_ob1_match_completion_free( struct mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* signal request completion */ @@ -191,7 +191,7 @@ mca_pml_ob1_rndv_completion( mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* count bytes of user data actually delivered. As the rndv completion only @@ -278,7 +278,7 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* count bytes of user data actually delivered */ @@ -997,7 +997,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ ORTE_ERROR_LOG(status); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc, @@ -1079,7 +1079,7 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag ) } else { /* TSW - FIX */ ORTE_ERROR_LOG(rc); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } } return OMPI_SUCCESS; @@ -1110,7 +1110,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq, if( OPAL_UNLIKELY(NULL == frag) ) { /* TSW - FIX */ ORTE_ERROR_LOG(rc); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* setup fragment */ diff --git a/ompi/mca/pml/v/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c b/ompi/mca/pml/v/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c index a8658a9302..82f6dfdc69 100644 --- a/ompi/mca/pml/v/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c +++ b/ompi/mca/pml/v/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c @@ -22,6 +22,7 @@ #include "ompi/datatype/datatype_memcpy.h" #include +#include "orte/util/proc_info.h" #define sb mca_vprotocol_pessimist.sender_based diff --git a/orte/mca/gpr/Makefile.am b/ompi/mca/pubsub/Makefile.am similarity index 72% rename from orte/mca/gpr/Makefile.am rename to ompi/mca/pubsub/Makefile.am index d4f917d16d..bd10632353 100644 --- a/orte/mca/gpr/Makefile.am +++ b/ompi/mca/pubsub/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University @@ -17,22 +17,22 @@ # # main library setup -noinst_LTLIBRARIES = libmca_gpr.la -libmca_gpr_la_SOURCES = +noinst_LTLIBRARIES = libmca_pubsub.la +libmca_pubsub_la_SOURCES = # header setup -nobase_orte_HEADERS = +nobase_ompi_HEADERS = # local files -headers = gpr.h gpr_types.h -libmca_gpr_la_SOURCES += $(headers) +headers = pubsub.h +libmca_pubsub_la_SOURCES += $(headers) # Conditionally install the header files if WANT_INSTALL_HEADERS -nobase_orte_HEADERS += $(headers) -ortedir = $(includedir)/openmpi/orte/mca/gpr +nobase_ompi_HEADERS += $(headers) +ompidir = $(includedir)/openmpi/ompi/mca/pubsub else -ortedir = $(includedir) +ompidir = $(includedir) endif include base/Makefile.am diff --git a/orte/mca/schema/base/Makefile.am b/ompi/mca/pubsub/base/Makefile.am similarity index 74% rename from orte/mca/schema/base/Makefile.am rename to ompi/mca/pubsub/base/Makefile.am index 973b2cf08d..b83f3d2c1e 100644 --- a/orte/mca/schema/base/Makefile.am +++ b/ompi/mca/pubsub/base/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University @@ -19,8 +19,8 @@ headers += \ base/base.h -libmca_schema_la_SOURCES += \ - base/schema_base_open.c \ - base/schema_base_close.c \ - base/schema_base_select.c \ - base/schema_base_fns.c +libmca_pubsub_la_SOURCES += \ + base/pubsub_base_open.c \ + base/pubsub_base_close.c \ + base/pubsub_base_select.c + diff --git a/ompi/mca/pubsub/base/base.h b/ompi/mca/pubsub/base/base.h new file mode 100644 index 0000000000..e4b00627bf --- /dev/null +++ b/ompi/mca/pubsub/base/base.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef OMPI_MCA_PUBSUB_BASE_H +#define OMPI_MCA_PUBSUB_BASE_H + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "ompi/mca/pubsub/pubsub.h" + +/* + * Global functions for MCA overall PUBSUB + */ + +BEGIN_C_DECLS + +/** + * Initialize the PUBSUB MCA framework + * + * @retval OMPI_SUCCESS Upon success + * @retval OMPI_ERROR Upon failures + * + * This function is invoked during ompi_init(); + */ +OMPI_DECLSPEC int ompi_pubsub_base_open(void); + +/** + * Select an available component. + * + * @retval OMPI_SUCCESS Upon Success + * @retval OMPI_NOT_FOUND If no component can be selected + * @retval OMPI_ERROR Upon other failure + * + */ +OMPI_DECLSPEC int ompi_pubsub_base_select(void); + +/** + * Finalize the PUBSUB MCA framework + * + * @retval OMPI_SUCCESS Upon success + * @retval OMPI_ERROR Upon failures + * + * This function is invoked during ompi_finalize(); + */ +OMPI_DECLSPEC int ompi_pubsub_base_close(void); + + +/* useful globals */ +OMPI_DECLSPEC extern int ompi_pubsub_base_output; +OMPI_DECLSPEC extern opal_list_t ompi_pubsub_base_components_available; +OMPI_DECLSPEC extern ompi_pubsub_base_component_t ompi_pubsub_base_selected_component; +OMPI_DECLSPEC extern ompi_pubsub_base_module_t ompi_pubsub; + +END_C_DECLS + +#endif /* OMPI_MCA_PUBSUB_BASE_H */ diff --git a/ompi/mca/pubsub/base/pubsub_base_close.c b/ompi/mca/pubsub/base/pubsub_base_close.c new file mode 100644 index 0000000000..182468f08e --- /dev/null +++ b/ompi/mca/pubsub/base/pubsub_base_close.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "ompi/mca/pubsub/pubsub.h" +#include "ompi/mca/pubsub/base/base.h" + +int ompi_pubsub_base_close(void) +{ + /* Close the selected component */ + if( NULL != ompi_pubsub.finalize ) { + ompi_pubsub.finalize(); + } + + /* Close all available modules that are open */ + mca_base_components_close(ompi_pubsub_base_output, + &ompi_pubsub_base_components_available, + NULL); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/pubsub/base/pubsub_base_open.c b/ompi/mca/pubsub/base/pubsub_base_open.c new file mode 100644 index 0000000000..ad61a8b394 --- /dev/null +++ b/ompi/mca/pubsub/base/pubsub_base_open.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "ompi/mca/pubsub/pubsub.h" +#include "ompi/mca/pubsub/base/base.h" + +#include "ompi/mca/pubsub/base/static-components.h" + +/* + * Globals + */ +OMPI_DECLSPEC int ompi_pubsub_base_output = -1; +OMPI_DECLSPEC ompi_pubsub_base_module_t ompi_pubsub; +opal_list_t ompi_pubsub_base_components_available; +ompi_pubsub_base_component_t ompi_pubsub_base_selected_component; + +/** + * Function for finding and opening either all MCA components, + * or the one that was specifically requested via a MCA parameter. + */ +int ompi_pubsub_base_open(void) +{ + /* Debugging/Verbose output */ + ompi_pubsub_base_output = opal_output_open(NULL); + + /* Open up all available components */ + if (OPAL_SUCCESS != + mca_base_components_open("pubsub", + ompi_pubsub_base_output, + mca_pubsub_base_static_components, + &ompi_pubsub_base_components_available, + true)) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/pubsub/base/pubsub_base_select.c b/ompi/mca/pubsub/base/pubsub_base_select.c new file mode 100644 index 0000000000..e13e92cb2d --- /dev/null +++ b/ompi/mca/pubsub/base/pubsub_base_select.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/base/mca_base_component_repository.h" + +#include "ompi/mca/pubsub/pubsub.h" +#include "ompi/mca/pubsub/base/base.h" + + +int ompi_pubsub_base_select(void) +{ + opal_list_item_t *item; + mca_base_component_list_item_t *cli; + ompi_pubsub_base_component_t *component, *best_component = NULL; + ompi_pubsub_base_module_t *module, *best_module = NULL; + int priority, best_priority = -1; + int rc; + + /* Query all the opened components and see if they want to run */ + + for (item = opal_list_get_first(&ompi_pubsub_base_components_available); + opal_list_get_end(&ompi_pubsub_base_components_available) != item; + item = opal_list_get_next(item)) { + cli = (mca_base_component_list_item_t *) item; + component = (ompi_pubsub_base_component_t *) cli->cli_component; + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: querying component %s", + component->pubsub_version.mca_component_name)); + + /* Call the component's init function and see if it wants to be + selected */ + + module = component->pubsub_init(&priority); + + /* If we got a non-NULL module back, then the component wants + to be considered for selection */ + + if (NULL != module) { + /* If this is the best one, save it */ + if (priority > best_priority) { + + /* If there was a previous best one, finalize */ + if (NULL != best_module) { + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: found better component - finalizing component %s", + best_component->pubsub_version.mca_component_name)); + + best_module->finalize(); + } + + /* Save the new best one */ + best_module = module; + best_component = component; + + /* update the best priority */ + best_priority = priority; + } else { + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: component %s does did not win the election", + component->pubsub_version.mca_component_name)); + + if (NULL == module->finalize) { + opal_output(ompi_pubsub_base_output, + "It appears you are the victim of a stale library - please delete your installation lib directory and reinstall"); + } else { + module->finalize(); + } + } + } + } + + /* If we didn't find one to select, barf */ + + if (NULL == best_component) { + return OMPI_ERROR; + } + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: component %s was selected", + best_component->pubsub_version.mca_component_name)); + + /* We have happiness -- save the component and module for later + usage */ + + ompi_pubsub = *best_module; + ompi_pubsub_base_selected_component = *best_component; + + /* unload all components that were not selected */ + item = opal_list_get_first(&ompi_pubsub_base_components_available); + while(item != opal_list_get_end(&ompi_pubsub_base_components_available)) { + opal_list_item_t* next = opal_list_get_next(item); + ompi_pubsub_base_component_t* component; + cli = (mca_base_component_list_item_t *) item; + component = (ompi_pubsub_base_component_t *) cli->cli_component; + if(component != best_component) { + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: module %s unloaded", + component->pubsub_version.mca_component_name)); + + mca_base_component_repository_release((mca_base_component_t *) component); + opal_list_remove_item(&ompi_pubsub_base_components_available, item); + OBJ_RELEASE(item); + } + item = next; + } + + /* init the selected module */ + if (NULL != ompi_pubsub.init) { + if (OMPI_SUCCESS != (rc = ompi_pubsub.init())) { + return rc; + } + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/pubsub/orte/Makefile.am b/ompi/mca/pubsub/orte/Makefile.am new file mode 100644 index 0000000000..4803e735bb --- /dev/null +++ b/ompi/mca/pubsub/orte/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +dist_pkgdata_DATA = help-ompi-pubsub-orte.txt + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_pubsub_orte_DSO +component_noinst = +component_install = mca_pubsub_orte.la +else +component_noinst = libmca_pubsub_orte.la +component_install = +endif + +local_sources = \ + pubsub_orte.c \ + pubsub_orte.h \ + pubsub_orte_component.c + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pubsub_orte_la_SOURCES = $(local_sources) +mca_pubsub_orte_la_LDFLAGS = -module -avoid-version $(pubsub_orte_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pubsub_orte_la_SOURCES = $(local_sources) +libmca_pubsub_orte_la_LIBADD = $(pubsub_orte_LIBS) +libmca_pubsub_orte_la_LDFLAGS = -module -avoid-version $(pubsub_orte_LDFLAGS) + diff --git a/orte/mca/errmgr/hnp/configure.params b/ompi/mca/pubsub/orte/configure.params similarity index 100% rename from orte/mca/errmgr/hnp/configure.params rename to ompi/mca/pubsub/orte/configure.params diff --git a/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt b/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt new file mode 100644 index 0000000000..b00908c65f --- /dev/null +++ b/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt @@ -0,0 +1,43 @@ +# -*- text -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open MPI. +# +[pubsub-orte:no-server] +Process rank %ld attempted to %s a global ompi_server that +could not be contacted. This is typically caused by either not +specifying the contact info for the server, or by the server not +currently executing. If you did specify the contact info for a +server, please check to see that the server is running and start +it again (or have your sys admin start it) if it isn't. + +[pubsub-orte:unknown-order] +Process rank %ld attempted to lookup a value but provided an +unrecognized order parameter. Order parameters are used to tell Open +MPI if it should first look for the requested value locally (i.e., from +the current job) or from a global ompi_server. Accepted order +parameters are "local" and "global", respectively. + +[pubsub-orte:too-many-orders] +Process rank %ld attempted to lookup a value but provided too many +order parameters (%ld found). Order parameters are used to tell +Open MPI if it should first look for the requested value locally +(i.e., from the current job) or from a global ompi_server. Accepted +order parameters are "local" and "global", respectively, and each can +only be specified once. diff --git a/ompi/mca/pubsub/orte/pubsub_orte.c b/ompi/mca/pubsub/orte/pubsub_orte.c new file mode 100644 index 0000000000..7ad1ec7341 --- /dev/null +++ b/ompi/mca/pubsub/orte/pubsub_orte.c @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include +#include +#include + +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/util/opal_getcwd.h" + +#include "opal/dss/dss.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/grpcomm/grpcomm.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/routed.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_data_server.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/proc/proc.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/info/info.h" +#include "ompi/runtime/ompi_module_exchange.h" + +#include "ompi/mca/pubsub/base/base.h" +#include "pubsub_orte.h" + +/* Establish contact with the server + * + * NOTE: we do not do this automatically during init to avoid + * forcing every process to pay the time penalty during MPI_Init + * when only a few, if any, will ever call pub/lookup/unpub. In + * addition, those that -do- call these functions may well only + * use local (as opposed to global) storage, and hence will have + * no need to talk to the server, even though a sys admin may + * have set one up. So we do a lazy setup of the server contact + * info - it only gets setup the first time we call a function + * that wants to talk to the global server + */ +static bool server_setup=false; + +static void setup_server(void) +{ + opal_buffer_t buf; + orte_rml_cmd_flag_t command=ORTE_RML_UPDATE_CMD; + int rc; + + if (NULL == mca_pubsub_orte_component.server_uri) { + /* if the contact info for the server is NULL, then there + * is nothing to do + */ + server_setup = true; + return; + } + + /* setup the route to the server using the + * selected routed component. This allows us + * to tell the local daemon how to reach the + * server, so we can still only have one connection + * open! To do this, we need to insert the server's + * uri into a buffer + */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + opal_dss.pack(&buf, &command, 1, ORTE_RML_CMD); + opal_dss.pack(&buf, &mca_pubsub_orte_component.server_uri, 1, OPAL_STRING); + if (ORTE_SUCCESS != (rc = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, &buf))) { + ORTE_ERROR_LOG(rc); + server_setup = true; + return; + } + OBJ_DESTRUCT(&buf); + + /* extract the server's name */ + orte_rml_base_parse_uris(mca_pubsub_orte_component.server_uri, &mca_pubsub_orte_component.server, NULL); + + /* flag the server as found */ + mca_pubsub_orte_component.server_found = true; + + /* flag setup as completed */ + server_setup = true; +} + +/* + * Init the module + */ +static int init(void) +{ + return OMPI_SUCCESS; +} + +/* + * publish the port_name for the specified service_name. This will + * be published under our process name, so only we will be allowed + * to remove it later. + */ +static int publish ( char *service_name, ompi_info_t *info, char *port_name ) +{ + int rc, ret, flag; + bool global_scope = false; + orte_process_name_t *info_host; + opal_buffer_t buf; + orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_PUBLISH; + orte_std_cntr_t cnt; + + ompi_info_get_bool(info, "ompi_global_scope", &global_scope, &flag); + + if (!global_scope) { + /* if the scope is not global, then store the value on the HNP */ + info_host = ORTE_PROC_MY_HNP; + } else { + /* has the server been setup yet? */ + if (!server_setup) { + setup_server(); + } + /* store the value on the global ompi_server, but error + * if that server wasn't contacted + */ + if (!mca_pubsub_orte_component.server_found) { + opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", + true, (long)ORTE_PROC_MY_NAME->vpid, "publish to"); + return OMPI_ERR_NOT_FOUND; + } + info_host = &mca_pubsub_orte_component.server; + } + + /* construct the buffer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + + /* pack the publish command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* pack the service name */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &service_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* pack the port name */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &port_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* send the data */ + if (0 > (rc = orte_rml.send_buffer(info_host, &buf, ORTE_RML_TAG_DATA_SERVER, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + OBJ_DESTRUCT(&buf); + + /* get the answer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + if (0 > (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, ORTE_RML_TAG_DATA_CLIENT, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* unpack the result */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &ret, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + rc = ret; + + +CLEANUP: + OBJ_DESTRUCT(&buf); + + return rc; +} + +enum { NONE, LOCAL, GLOBAL }; + +static char* lookup ( char *service_name, ompi_info_t *info ) +{ + orte_process_name_t *info_host; + opal_buffer_t buf; + orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_LOOKUP; + orte_std_cntr_t cnt=0; + char *port_name=NULL; + int ret, rc, flag, i; + char value[256], **tokens, *ptr; + int lookup[2] = { LOCAL, GLOBAL }; + size_t num_tokens; + + /* Look in the MPI_Info (ompi_info_t*) for the key + * "ompi_lookup_order". Acceptable values are: + * + * - "local" -- only check the local scope + * - "global" -- only check the global scope + * - "local,global" -- check the local scope first, then check the + * global scope + * - "global,local" -- check the global scope first, then check the + * local scope + * + * Give a little leeway in terms of whitespace in the value. + * + * The lookup[2] array will contain the results: lookup[0] is the + * first scope to check, lookup[1] is the 2nd. Either value may + * be NONE, LOCAL, or GLOBAL. If both are NONE, clearly that's an + * error. :-) + */ + ompi_info_get(info, "ompi_lookup_order", sizeof(value) - 1, value, &flag); + if (flag) { + ptr = &value[0]; + while (isspace(*ptr) && (ptr - value) < (int)sizeof(value)) { + ++ptr; + } + if (ptr - value < (int)sizeof(value)) { + tokens = opal_argv_split(ptr, ','); + if (NULL != tokens) { + if ((num_tokens = opal_argv_count(tokens)) > 2) { + /* too many values in the comma-delimited list */ + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:too-many-orders", + true, (long)ORTE_PROC_MY_NAME->vpid, + (long)num_tokens); + return NULL; + } + for (i = 0; i < 2; ++i) { + if (NULL != tokens[i]) { + if (0 == strcasecmp(tokens[i], "local")) { + lookup[i] = LOCAL; + } else if (0 == strcasecmp(tokens[i], "global")) { + lookup[i] = GLOBAL; + } else { + /* unrecognized value -- that's an error */ + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:unknown-order", + true, (long)ORTE_PROC_MY_NAME->vpid); + return NULL; + } + } else { + lookup[i] = NONE; + } + } + opal_argv_free(tokens); + } + } + } + + /* check for error situations */ + + if (NONE == lookup[0]) { + /* if the user provided an info key, then we at least must + * be given one place to look + */ + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:unknown-order", + true, (long)ORTE_PROC_MY_NAME->vpid); + return NULL; + } + + if (GLOBAL == lookup[0]) { + /* has the server been setup yet? */ + if (!server_setup) { + setup_server(); + } + + if (!mca_pubsub_orte_component.server_found) { + /* if we were told to look global first and no server is + * present, then that is an error + */ + opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", + true, (long)ORTE_PROC_MY_NAME->vpid, "lookup from"); + return NULL; + } + } + + /* go find the value */ + for (i=0; i < 2; i++) { + if (LOCAL == lookup[i]) { + /* if the scope is local, then lookup the value on the HNP */ + info_host = ORTE_PROC_MY_HNP; + } else if (GLOBAL == lookup[i]) { + /* has the server been setup yet? */ + if (!server_setup) { + setup_server(); + } + /* lookup the value on the global ompi_server, but error + * if that server wasn't contacted + */ + if (!mca_pubsub_orte_component.server_found) { + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:no-server", + true, (long)ORTE_PROC_MY_NAME->vpid, + "lookup from"); + return NULL; + } + info_host = &mca_pubsub_orte_component.server; + } else { + /* unknown host! */ + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:unknown-order", + true, (long)ORTE_PROC_MY_NAME->vpid); + return NULL; + } + + /* go look it up */ + /* construct the buffer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + + /* pack the lookup command */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + /* pack the service name */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buf, &service_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + /* send the cmd */ + if (0 > (ret = orte_rml.send_buffer(info_host, &buf, ORTE_RML_TAG_DATA_SERVER, 0))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + OBJ_DESTRUCT(&buf); + + /* get the answer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + if (0 > (ret = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, ORTE_RML_TAG_DATA_CLIENT, 0))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + /* unpack the return code */ + cnt = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(&buf, &rc, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + if (ORTE_SUCCESS == rc) { + /* the server was able to lookup the port - unpack the port name */ + cnt=1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(&buf, &port_name, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + if (NULL != port_name) { + /* got an answer - return it */ + OBJ_DESTRUCT(&buf); + return port_name; + } + } + + /* if we didn't get a port_name, then continue */ + OBJ_DESTRUCT(&buf); + } + + /* only get here if we tried both options and failed - since the + * buffer will already have been cleaned up, just return + */ + return NULL; + +CLEANUP: + OBJ_DESTRUCT(&buf); + + return NULL; + +} + +/* + * delete the entry. Only the process who has published + * the service_name has the right to remove this + * service - the server will verify and report the result + */ +static int unpublish ( char *service_name, ompi_info_t *info ) +{ + int rc, ret, flag; + bool global_scope; + orte_process_name_t *info_host; + opal_buffer_t buf; + orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_UNPUBLISH; + orte_std_cntr_t cnt; + + ompi_info_get_bool(info, "ompi_global_scope", &global_scope, &flag); + + if (!global_scope) { + /* if the scope is not global, then unpublish the value from the HNP */ + info_host = ORTE_PROC_MY_HNP; + } else { + /* has the server been setup yet? */ + if (!server_setup) { + setup_server(); + } + /* unpublish the value from the global ompi_server, but error + * if that server wasn't contacted + */ + if (!mca_pubsub_orte_component.server_found) { + opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", + true); + return OMPI_ERR_NOT_FOUND; + } + info_host = &mca_pubsub_orte_component.server; + } + + /* construct the buffer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + + /* pack the unpublish command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* pack the service name */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &service_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* send the command */ + if (0 > (rc = orte_rml.send_buffer(info_host, &buf, ORTE_RML_TAG_DATA_SERVER, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + OBJ_DESTRUCT(&buf); + + /* get the answer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + if (0 > (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, ORTE_RML_TAG_DATA_CLIENT, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* unpack the result */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &ret, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + rc = ret; + +CLEANUP: + OBJ_DESTRUCT(&buf); + + return rc; +} + + +/* + * finalize the module + */ +static int finalize(void) +{ + return OMPI_SUCCESS; +} + +/* + * instantiate the module + */ +ompi_pubsub_base_module_t ompi_pubsub_orte_module = { + init, + publish, + unpublish, + lookup, + finalize +}; + + diff --git a/ompi/mca/pubsub/orte/pubsub_orte.h b/ompi/mca/pubsub/orte/pubsub_orte.h new file mode 100644 index 0000000000..8f4acdff0d --- /dev/null +++ b/ompi/mca/pubsub/orte/pubsub_orte.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_PUBSUB_ORTE_H +#define OMPI_PUBSUB_ORTE_H + +#include "ompi_config.h" + +#include "orte/types.h" + +#include "ompi/mca/pubsub/pubsub.h" + +BEGIN_C_DECLS + +/* + * Extend the pubsub component to hold some useful + * values for this component + */ +typedef struct { + ompi_pubsub_base_component_t super; + orte_process_name_t server; + char *server_uri; + bool server_found; +} ompi_pubsub_orte_component_t; + +/* access to module */ +extern ompi_pubsub_base_module_t ompi_pubsub_orte_module; + +/* access to component so we can get to the locally + * global values + */ +OMPI_MODULE_DECLSPEC extern ompi_pubsub_orte_component_t mca_pubsub_orte_component; + +END_C_DECLS + +#endif /* OMPI_PUBSUB_ORTE_H */ diff --git a/ompi/mca/pubsub/orte/pubsub_orte_component.c b/ompi/mca/pubsub/orte/pubsub_orte_component.c new file mode 100644 index 0000000000..a33263a8bc --- /dev/null +++ b/ompi/mca/pubsub/orte/pubsub_orte_component.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "pubsub_orte.h" + +static int pubsub_orte_component_open(void); +static int pubsub_orte_component_close(void); +static ompi_pubsub_base_module_t* +pubsub_orte_component_init( int* priority ); + +ompi_pubsub_orte_component_t mca_pubsub_orte_component = { + { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a pubsub v1.0.0 component (which also implies + a specific MCA version) */ + + OMPI_PUBSUB_BASE_VERSION_1_0_0, + + "orte", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + pubsub_orte_component_open, /* component open */ + pubsub_orte_component_close /* component close */ + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* This component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + pubsub_orte_component_init, /* component init */ + } +}; + + +int pubsub_orte_component_open(void) +{ + return OMPI_SUCCESS; +} + +int pubsub_orte_component_close(void) +{ + if (NULL != mca_pubsub_orte_component.server_uri) { + free(mca_pubsub_orte_component.server_uri); + mca_pubsub_orte_component.server_uri = NULL; + } + return OMPI_SUCCESS; +} + +ompi_pubsub_base_module_t* pubsub_orte_component_init(int* priority) +{ + mca_base_component_t *comp = &mca_pubsub_orte_component.super.pubsub_version; + + mca_base_param_reg_string(comp, "server", + "Contact info for ompi_server for publish/subscribe operations", + false, false, NULL, + &mca_pubsub_orte_component.server_uri); + + mca_pubsub_orte_component.server_found = false; + + *priority = 50; + + return &ompi_pubsub_orte_module; +} diff --git a/ompi/mca/pubsub/pubsub.h b/ompi/mca/pubsub/pubsub.h new file mode 100644 index 0000000000..9e310bc757 --- /dev/null +++ b/ompi/mca/pubsub/pubsub.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Dynamic Process Management Interface + * + */ + +#ifndef OMPI_MCA_PUBSUB_H +#define OMPI_MCA_PUBSUB_H + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/class/opal_object.h" + +#include "ompi/info/info.h" +#include "ompi/communicator/communicator.h" + +BEGIN_C_DECLS + +/* + * Initialize a module + */ +typedef int (*ompi_pubsub_base_module_init_fn_t)(void); + +/* + * Publish a data item + */ +typedef int (*ompi_pubsub_base_module_publish_fn_t)(char *service, ompi_info_t *info, char *port); + +/* + * Unpublish a data item + */ +typedef int (*ompi_pubsub_base_module_unpublish_fn_t)(char *service, ompi_info_t *info); + +/* + * Lookup a data item + */ +typedef char* (*ompi_pubsub_base_module_lookup_fn_t)(char *service, ompi_info_t *info); + +/* + * Finalize a module + */ +typedef int (*ompi_pubsub_base_module_finalize_fn_t)(void); + +/** +* Structure for PUBSUB v1.0.0 modules + */ +struct ompi_pubsub_base_module_1_0_0_t { + /** Initialization Function */ + ompi_pubsub_base_module_init_fn_t init; + /* Publish */ + ompi_pubsub_base_module_publish_fn_t publish; + /* Unpublish */ + ompi_pubsub_base_module_unpublish_fn_t unpublish; + /* Lookup */ + ompi_pubsub_base_module_lookup_fn_t lookup; + /* finalize */ + ompi_pubsub_base_module_finalize_fn_t finalize; +}; +typedef struct ompi_pubsub_base_module_1_0_0_t ompi_pubsub_base_module_1_0_0_t; +typedef struct ompi_pubsub_base_module_1_0_0_t ompi_pubsub_base_module_t; + +OMPI_DECLSPEC extern ompi_pubsub_base_module_t ompi_pubsub; + + +typedef struct ompi_pubsub_base_module_1_0_0_t* +(*ompi_pubsub_base_component_init_fn_t)(int *priority); + + +/** + * Structure for PUBSUB v1.0.0 components. + */ +struct ompi_pubsub_base_component_1_0_0_t { + /** MCA base component */ + mca_base_component_t pubsub_version; + /** MCA base data */ + mca_base_component_data_1_0_0_t pubsub_data; + /* component selection */ + ompi_pubsub_base_component_init_fn_t pubsub_init; +}; +typedef struct ompi_pubsub_base_component_1_0_0_t ompi_pubsub_base_component_1_0_0_t; +typedef struct ompi_pubsub_base_component_1_0_0_t ompi_pubsub_base_component_t; + +/** + * Macro for use in components that are of type CRCP v1.0.0 + */ +#define OMPI_PUBSUB_BASE_VERSION_1_0_0 \ + /* PUBSUB v1.0 is chained to MCA v1.0 */ \ + MCA_BASE_VERSION_1_0_0, \ + /* PUBSUB v1.0 */ \ + "pubsub", 1, 0, 0 + + +END_C_DECLS + +#endif /* OMPI_MCA_PUBSUB_H */ diff --git a/ompi/mpi/c/close_port.c b/ompi/mpi/c/close_port.c index ba067bf750..e6fba135b6 100644 --- a/ompi/mpi/c/close_port.c +++ b/ompi/mpi/c/close_port.c @@ -19,6 +19,8 @@ #include #include "ompi/mpi/c/bindings.h" +#include "ompi/mca/dpm/dpm.h" + #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Close_port = PMPI_Close_port @@ -33,6 +35,7 @@ static const char FUNC_NAME[] = "MPI_Close_port"; int MPI_Close_port(char *port_name) { + int ret; OPAL_CR_NOOP_PROGRESS(); @@ -44,13 +47,7 @@ int MPI_Close_port(char *port_name) FUNC_NAME); } - /* - * since the port_name is our own process_name_t structure, - * we do not have to close anything or free a pointer. - * This function is therefore just a dummy function - * and fully implemented. I love these type functions, - * we should have more of them :-). - */ - - return MPI_SUCCESS; + ret = ompi_dpm.close_port(port_name); + + OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_accept.c b/ompi/mpi/c/comm_accept.c index 438336665b..48619cfaf3 100644 --- a/ompi/mpi/c/comm_accept.c +++ b/ompi/mpi/c/comm_accept.c @@ -21,6 +21,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -93,10 +94,10 @@ int MPI_Comm_accept(char *port_name, MPI_Info info, int root, * The two leaders will figure this out later. However, we need the tag. */ if ( rank == root ) { - tmp_port = ompi_parse_port(port_name, &tag); + tmp_port = ompi_dpm.parse_port(port_name, &tag); free (tmp_port); } - rc = ompi_comm_connect_accept (comm, root, NULL, send_first, &newcomp, tag); + rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, &newcomp, tag); *newcomm = newcomp; OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME ); diff --git a/ompi/mpi/c/comm_connect.c b/ompi/mpi/c/comm_connect.c index 5889a8affc..63f79b4439 100644 --- a/ompi/mpi/c/comm_connect.c +++ b/ompi/mpi/c/comm_connect.c @@ -21,9 +21,13 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" -#include "orte/mca/ns/ns.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" +#include "orte/util/name_fns.h" +#include "opal/dss/dss.h" +#include "orte/runtime/orte_globals.h" + #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Comm_connect = PMPI_Comm_connect #endif @@ -41,7 +45,7 @@ int MPI_Comm_connect(char *port_name, MPI_Info info, int root, int rank, rc; int send_first=1; /* yes, we are the active part in this game */ ompi_communicator_t *newcomp=MPI_COMM_NULL; - orte_process_name_t *port_proc_name=NULL; + orte_process_name_t port_proc_name; char *tmp_port=NULL; orte_rml_tag_t tag; @@ -97,18 +101,18 @@ int MPI_Comm_connect(char *port_name, MPI_Info info, int root, * structure. */ if ( rank == root ) { - tmp_port = ompi_parse_port (port_name, &tag); - if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(&port_proc_name, tmp_port))) { - return rc; + tmp_port = ompi_dpm.parse_port (port_name, &tag); + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_process_name(&port_proc_name, tmp_port))) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_PORT, FUNC_NAME); } - if ( NULL == port_proc_name ) { + if ( OPAL_EQUAL == opal_dss.compare(&port_proc_name, ORTE_NAME_INVALID, ORTE_NAME) ) { *newcomm = MPI_COMM_NULL; return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_PORT, FUNC_NAME); } free (tmp_port); } - rc = ompi_comm_connect_accept (comm, root, port_proc_name, send_first, + rc = ompi_dpm.connect_accept (comm, root, &port_proc_name, send_first, &newcomp, tag); *newcomm = newcomp; diff --git a/ompi/mpi/c/comm_disconnect.c b/ompi/mpi/c/comm_disconnect.c index 63969e1ec2..fa91669796 100644 --- a/ompi/mpi/c/comm_disconnect.c +++ b/ompi/mpi/c/comm_disconnect.c @@ -30,6 +30,9 @@ #include "ompi/mpi/c/profile/defines.h" #endif +#include "ompi/mca/dpm/dpm.h" + + static const char FUNC_NAME[] = "MPI_Comm_disconnect"; @@ -54,10 +57,7 @@ int MPI_Comm_disconnect(MPI_Comm *comm) OPAL_CR_ENTER_LIBRARY(); if ( OMPI_COMM_IS_DYNAMIC(*comm)) { - ompi_comm_disconnect_obj *dobj; - - dobj = ompi_comm_disconnect_init (*comm); - ompi_comm_disconnect_waitall(1, &dobj); + ompi_dpm.disconnect (*comm); } else { (*comm)->c_coll.coll_barrier(*comm, (*comm)->c_coll.coll_barrier_module); diff --git a/ompi/mpi/c/comm_join.c b/ompi/mpi/c/comm_join.c index 318d675f0c..047e295610 100644 --- a/ompi/mpi/c/comm_join.c +++ b/ompi/mpi/c/comm_join.c @@ -33,8 +33,10 @@ #endif #include "ompi/mpi/c/bindings.h" -#include "orte/mca/ns/ns.h" #include "ompi/proc/proc.h" +#include "ompi/mca/dpm/dpm.h" + +#include "orte/util/name_fns.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Comm_join = PMPI_Comm_join @@ -51,7 +53,8 @@ static int ompi_socket_recv (int fd, char *buf, int len ); int MPI_Comm_join(int fd, MPI_Comm *intercomm) { - int rc, tag=OMPI_COMM_JOIN_TAG; + int rc; + orte_rml_tag_t tag=OMPI_COMM_JOIN_TAG; size_t size; uint32_t len, rlen, llen, lrlen; int send_first=1; @@ -59,7 +62,7 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) ompi_proc_t **myproc=NULL; ompi_communicator_t *newcomp; - orte_process_name_t *port_proc_name=NULL; + orte_process_name_t port_proc_name; if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -76,7 +79,7 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) Need to determine somehow how to avoid a potential deadlock here. */ myproc = ompi_proc_self (&size); - if (ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string (&name, &(myproc[0]->proc_name)))) { + if (ORTE_SUCCESS != (rc = orte_util_convert_process_name_to_string (&name, &(myproc[0]->proc_name)))) { OPAL_CR_EXIT_LIBRARY(); return rc; } @@ -100,17 +103,16 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) ompi_socket_send (fd, name, llen); ompi_socket_recv (fd, rname, lrlen); - if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(&port_proc_name, rname))) { + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_process_name(&port_proc_name, rname))) { OPAL_CR_EXIT_LIBRARY(); - return rc; + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_PORT, FUNC_NAME); } - rc = ompi_comm_connect_accept (MPI_COMM_SELF, 0, port_proc_name, - send_first, &newcomp, tag); + rc = ompi_dpm.connect_accept (MPI_COMM_SELF, 0, &port_proc_name, + send_first, &newcomp, tag); free ( name ); free ( rname); - free ( port_proc_name ); free ( myproc ); *intercomm = newcomp; diff --git a/ompi/mpi/c/comm_spawn.c b/ompi/mpi/c/comm_spawn.c index 6e28409c94..50700dc2fc 100644 --- a/ompi/mpi/c/comm_spawn.c +++ b/ompi/mpi/c/comm_spawn.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,6 +22,7 @@ #include "opal/util/show_help.h" #include "ompi/info/info.h" #include "ompi/mpi/c/bindings.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -39,13 +40,14 @@ int MPI_Comm_spawn(char *command, char **argv, int maxprocs, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *intercomm, int *array_of_errcodes) { - int rank, rc, i; - int send_first=0; /* we wait to be contacted */ + int rank, rc=OMPI_SUCCESS, i, flag; + bool send_first = false; /* we wait to be contacted */ ompi_communicator_t *newcomp=NULL; char port_name[MPI_MAX_PORT_NAME]; char *tmp_port; orte_rml_tag_t tag; - + bool non_mpi = false; + MEMCHECKER( memchecker_comm(comm); ); @@ -89,21 +91,37 @@ int MPI_Comm_spawn(char *command, char **argv, int maxprocs, MPI_Info info, } } + /* See if the info key "ompi_non_mpi" was set to true */ + ompi_info_get_bool(info, "ompi_non_mpi", &non_mpi, &flag); + OPAL_CR_ENTER_LIBRARY(); if ( rank == root ) { - /* Open a port. The port_name is passed as an environment variable - to the children. */ - ompi_open_port (port_name); - if (OMPI_SUCCESS != (rc = ompi_comm_start_processes (1, &command, &argv, &maxprocs, - &info, port_name))) { + if (non_mpi) { + /* no port is required since we won't be + * communicating with the children + */ + port_name[0] = '\0'; + } else { + /* Open a port. The port_name is passed as an environment + variable to the children. */ + ompi_dpm.open_port (port_name); + } + if (OMPI_SUCCESS != (rc = ompi_dpm.spawn (1, &command, &argv, &maxprocs, + &info, port_name))) { goto error; } - tmp_port = ompi_parse_port (port_name, &tag); - free(tmp_port); + if (!non_mpi) { + tmp_port = ompi_dpm.parse_port (port_name, &tag); + free(tmp_port); + } + } + + if (non_mpi) { + newcomp = MPI_COMM_NULL; + } else { + rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, &newcomp, tag); } - - rc = ompi_comm_connect_accept (comm, root, NULL, send_first, &newcomp, tag); error: OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpi/c/comm_spawn_multiple.c b/ompi/mpi/c/comm_spawn_multiple.c index 7d1f8083d9..f8d618b7fe 100644 --- a/ompi/mpi/c/comm_spawn_multiple.c +++ b/ompi/mpi/c/comm_spawn_multiple.c @@ -22,6 +22,7 @@ #include "opal/util/show_help.h" #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -40,12 +41,13 @@ int MPI_Comm_spawn_multiple(int count, char **array_of_commands, char ***array_o int root, MPI_Comm comm, MPI_Comm *intercomm, int *array_of_errcodes) { - int i=0, rc=0, rank=0; + int i=0, rc=0, rank=0, flag; ompi_communicator_t *newcomp=NULL; int send_first=0; /* they are contacting us first */ char port_name[MPI_MAX_PORT_NAME]; char *tmp_port; orte_rml_tag_t tag = 0; + bool non_mpi, cumulative = false; MEMCHECKER( memchecker_comm(comm); @@ -77,6 +79,24 @@ int MPI_Comm_spawn_multiple(int count, char **array_of_commands, char ***array_o return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO, FUNC_NAME); } + /* If ompi_non_mpi is set to true on any info, it must be + set to true on all of them. Note that not setting + ompi_non_mpi is the same as setting it to false. */ + ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, + &flag); + if (flag && 0 == i) { + /* If this is the first info, save its ompi_non_mpi value */ + cumulative = non_mpi; + } else if (!flag) { + non_mpi = false; + } + /* If this info's effective value doesn't agree with the + rest of them, error */ + if (cumulative != non_mpi) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, + MPI_ERR_INFO, + FUNC_NAME); + } } } @@ -106,22 +126,43 @@ int MPI_Comm_spawn_multiple(int count, char **array_of_commands, char ***array_o } } + if (MPI_INFO_NULL == array_of_info[0]) { + non_mpi = false; + } else { + ompi_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi, + &flag); + if (!flag) { + non_mpi = false; + } + } + OPAL_CR_ENTER_LIBRARY(); if ( rank == root ) { - /* Open a port. The port_name is passed as an environment variable - * to the children. */ - ompi_open_port (port_name); - if (OMPI_SUCCESS != (rc = ompi_comm_start_processes(count, array_of_commands, - array_of_argv, array_of_maxprocs, - array_of_info, port_name))) { + if (non_mpi) { + /* RHC: should this be better? */ + port_name[0] = '\0'; + } else { + /* Open a port. The port_name is passed as an environment + variable to the children. */ + ompi_dpm.open_port (port_name); + } + if (OMPI_SUCCESS != (rc = ompi_dpm.spawn(count, array_of_commands, + array_of_argv, array_of_maxprocs, + array_of_info, port_name))) { goto error; } - tmp_port = ompi_parse_port (port_name, &tag); - free(tmp_port); + if (!non_mpi) { + tmp_port = ompi_dpm.parse_port (port_name, &tag); + free(tmp_port); + } } - rc = ompi_comm_connect_accept (comm, root, NULL, send_first, &newcomp, tag); + if (non_mpi) { + newcomp = MPI_COMM_NULL; + } else { + rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, &newcomp, tag); + } error: OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpi/c/lookup_name.c b/ompi/mpi/c/lookup_name.c index ee966da911..59c9ec1a0c 100644 --- a/ompi/mpi/c/lookup_name.c +++ b/ompi/mpi/c/lookup_name.c @@ -21,6 +21,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" +#include "ompi/mca/pubsub/pubsub.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Lookup_name = PMPI_Lookup_name @@ -65,7 +66,7 @@ int MPI_Lookup_name(char *service_name, MPI_Info info, char *port_name) * if multiple entries found, this implementation uses * at the moment the first entry. */ - tmp = (char *) ompi_comm_namelookup(service_name); + tmp = (char *) ompi_pubsub.lookup(service_name, info); if ( NULL == tmp ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NAME, FUNC_NAME); diff --git a/ompi/mpi/c/open_port.c b/ompi/mpi/c/open_port.c index 84de7e479c..c673e25de3 100644 --- a/ompi/mpi/c/open_port.c +++ b/ompi/mpi/c/open_port.c @@ -21,6 +21,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" #include "ompi/proc/proc.h" +#include "ompi/mca/dpm/dpm.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Open_port = PMPI_Open_port @@ -61,8 +62,9 @@ int MPI_Open_port(MPI_Info info, char *port_name) */ } + rc = ompi_dpm.open_port(port_name); + OPAL_CR_ENTER_LIBRARY(); - rc = ompi_open_port(port_name); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/publish_name.c b/ompi/mpi/c/publish_name.c index e1748c1733..1f62ebe5c3 100644 --- a/ompi/mpi/c/publish_name.c +++ b/ompi/mpi/c/publish_name.c @@ -21,6 +21,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" +#include "ompi/mca/pubsub/pubsub.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Publish_name = PMPI_Publish_name @@ -62,9 +63,18 @@ int MPI_Publish_name(char *service_name, MPI_Info info, * therefore, we do not parse the info-object at the moment. */ - rc = ompi_comm_namepublish (service_name, port_name); + rc = ompi_pubsub.publish (service_name, info, port_name); OPAL_CR_EXIT_LIBRARY(); if ( OMPI_SUCCESS != rc ) { + if (OMPI_EXISTS == rc) { + /* already exists - can't publish it */ + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_FILE_EXISTS, + FUNC_NAME); + } + + /* none of the MPI-specific errors occurred - must be some + * kind of internal error + */ return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME); } diff --git a/ompi/mpi/c/unpublish_name.c b/ompi/mpi/c/unpublish_name.c index 8f6faabfd2..89f7195e7b 100644 --- a/ompi/mpi/c/unpublish_name.c +++ b/ompi/mpi/c/unpublish_name.c @@ -20,6 +20,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" +#include "ompi/mca/pubsub/pubsub.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Unpublish_name = PMPI_Unpublish_name @@ -60,10 +61,26 @@ int MPI_Unpublish_name(char *service_name, MPI_Info info, * No predefined info-objects for this function in MPI-2, * therefore, we do not parse the info-object at the moment. */ - rc = ompi_comm_nameunpublish(service_name); + rc = ompi_pubsub.unpublish(service_name, info); if ( OMPI_SUCCESS != rc ) { + if (OMPI_ERR_NOT_FOUND == rc) { + /* service couldn't be found */ + OPAL_CR_EXIT_LIBRARY(); + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_SERVICE, + FUNC_NAME); + } + if (OMPI_ERR_PERM == rc) { + /* this process didn't own the specified service */ + OPAL_CR_EXIT_LIBRARY(); + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ACCESS, + FUNC_NAME); + } + + /* none of the MPI-specific errors occurred - must be some + * kind of internal error + */ OPAL_CR_EXIT_LIBRARY(); - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_SERVICE, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME); } diff --git a/ompi/mpi/man/man3/MPI_Comm_spawn.3 b/ompi/mpi/man/man3/MPI_Comm_spawn.3 index c7ad02d45f..c8398eeb31 100644 --- a/ompi/mpi/man/man3/MPI_Comm_spawn.3 +++ b/ompi/mpi/man/man3/MPI_Comm_spawn.3 @@ -95,15 +95,28 @@ The \fIinfo\fP argument is an opaque handle of type MPI_Info in C, MPI::Info in .sp For the SPAWN calls, \fIinfo\fP provides additional, implementation-dependent instructions to MPI and the runtime system on how to start processes. An application may pass MPI_INFO_NULL in C or Fortran. Portable programs not requiring detailed control over process locations should use MPI_INFO_NULL. .sp -The following values for \fIinfo\fP are recognized in Open MPI. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.) +The following keys for \fIinfo\fP are recognized in Open MPI. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.) .sp .nf -Key value Type Description ----------- ---- ----------- +Key Type Description +--- ---- ----------- host char * Host on which the process should be spawned. wdir char * Directory where the executable is located. +ompi_prefix char * Same as the --prefix command line argument + to mpirun. +ompi_non_mpi bool If set to true, launching a non-MPI + application; the returned communicator + will be MPI_COMM_NULL. .fi + +\fIbool\fP info keys are actually strings but are evaluated as +follows: if the string value is a number, it is converted to an +integer and cast to a boolean (meaning that zero integers are false +and non-zero values are true). If the string value is +(case-insensitive) "yes" or "true", the boolean is true. If the +string value is (case-insensitive) "no" or "false", the boolean is +false. All other string values are unrecognized, and therefore false. .sp The \fIroot\fP Argument diff --git a/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3 b/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3 index 4da5a9c5de..3e2b62427e 100644 --- a/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3 +++ b/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3 @@ -105,17 +105,37 @@ Error codes are treated as for MPI_Comm_spawn. .SH INFO ARGUMENTS -The following values for \fIinfo\fP are recognized in Open MPI 1.2. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.) +The following keys for \fIinfo\fP are recognized in Open MPI 1.2. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.) .sp .sp .nf -Key value Type Description ----------- ---- ----------- +Key Type Description +--- ---- ----------- host char * Host on which the process should be spawned. wdir char * Directory where the executable is located. +ompi_prefix char * Same as the --prefix command line argument + to mpirun. +ompi_non_mpi bool If set to true, launching a non-MPI + application; the returned communicator + will be MPI_COMM_NULL. .fi - + +.sp +\fIbool\fP info keys are actually strings but are evaluated as +follows: if the string value is a number, it is converted to an +integer and cast to a boolean (meaning that zero integers are false +and non-zero values are true). If the string value is +(case-insensitive) "yes" or "true", the boolean is true. If the +string value is (case-insensitive) "no" or "false", the boolean is +false. All other string values are unrecognized, and therefore false. + +.sp +Note that if any of the info handles have \fIompi_non_mpi\fP set to +true, then all info handles must have it set to true. If some are set +to true, but others are set to false (or are unset), MPI_ERR_INFO will +be returned. + .sp Note that in Open MPI 1.2, the first array location in \fIarray_of_info\fP is applied to all the commands in \fIarray_of_commands\fP. diff --git a/ompi/mpi/man/man3/MPI_Lookup_name.3 b/ompi/mpi/man/man3/MPI_Lookup_name.3 index 57eb070e0d..5e70ef1aea 100644 --- a/ompi/mpi/man/man3/MPI_Lookup_name.3 +++ b/ompi/mpi/man/man3/MPI_Lookup_name.3 @@ -35,7 +35,7 @@ service_name A service name (string). .TP 1.4i info -Options to the name service functions (handle). No options currently supported. +Options to the name service functions (handle). .SH OUTPUT PARAMETERS .ft R @@ -52,9 +52,37 @@ This function retrieves a \fIport_name\fP published under \fIservice_name\fP by a previous invocation of MPI_Publish_name. The application must supply a \fIport_name\fP buffer large enough to hold the largest possible port name (i.e., MPI_MAX_PORT_NAME bytes). + +.SH INFO ARGUMENTS +The following keys for \fIinfo\fP are recognized: .sp -The \fIinfo\fP parameter should be MPI_INFO_NULL, as this routine does -not parse any MPI Info arguments. +.sp +.nf +Key Type Description +--- ---- ----------- + +ompi_lookup_order char * Resolution order for name lookup. +.fi + +The \fIompi_lookup_order\fP info key can specify one of four valid +string values (see the NAME SCOPE section below for more information +on name scopes): + +.TP 4 +\fIlocal\fP: Only search the local scope for name resolution. +.TP 4 +\fIglobal\fP: Only search the global scope for name resolution. +.TP 4 +\fIlocal,global\fP: Search the local scope for name resolution. If +not found, try searching the global scope for name resolution. This +behavior is the default if the \fIompi_lookup_order\fP info key is not +specified. +.TP 4 +\fIglobal,local\fP: Search the global scope for name resolution. If +not found, try searching the local scope for name resolution. + +.SH NAME SCOPE +RHC fill in here :-) .SH ERRORS .ft R diff --git a/ompi/mpi/man/man3/MPI_Publish_name.3 b/ompi/mpi/man/man3/MPI_Publish_name.3 index 77741ee920..84f32f8ce2 100644 --- a/ompi/mpi/man/man3/MPI_Publish_name.3 +++ b/ompi/mpi/man/man3/MPI_Publish_name.3 @@ -35,7 +35,7 @@ service_name A service name (string). .TP 1.4i info -Options to the name service functions (handle). No options currently supported. +Options to the name service functions (handle). .ft R .TP 1.4i port_name @@ -53,9 +53,32 @@ an application may retrieve \fIport_name\fP by calling MPI_Lookup_name with \fIservice_name\fP as an argument. It is an error to publish the same \fIservice_name\fP twice, or to use a \fIport_name\fP argument that was not previously opened by the calling process via a call to MPI_Open_port. + +.SH INFO ARGUMENTS +The following keys for \fIinfo\fP are recognized: .sp -The \fIinfo\fP parameter should be MPI_INFO_NULL, as this routine does -not parse any MPI Info arguments. +.sp +.nf +Key Type Description +--- ---- ----------- + +ompi_global_scope bool If set to true, publish the name in + the global scope. Publish in the local + scope otherwise. See the NAME SCOPE + section for more details. +.fi + +.sp +\fIbool\fP info keys are actually strings but are evaluated as +follows: if the string value is a number, it is converted to an +integer and cast to a boolean (meaning that zero integers are false +and non-zero values are true). If the string value is +(case-insensitive) "yes" or "true", the boolean is true. If the +string value is (case-insensitive) "no" or "false", the boolean is +false. All other string values are unrecognized, and therefore false. + +.SH NAME SCOPE +RHC fill in here :-) .SH ERRORS .ft R diff --git a/ompi/mpi/man/man3/MPI_Unpublish_name.3 b/ompi/mpi/man/man3/MPI_Unpublish_name.3 index 6947655746..68072161fd 100644 --- a/ompi/mpi/man/man3/MPI_Unpublish_name.3 +++ b/ompi/mpi/man/man3/MPI_Unpublish_name.3 @@ -35,7 +35,7 @@ service_name A service name (string). .TP 1.4i info -Options to the name service functions (handle). No options currently supported. +Options to the name service functions (handle). .ft R .TP 1.4i port_name @@ -54,9 +54,33 @@ MPI_Lookup_name. It is an error to unpublish a \fIservice_name\fP that was not published via MPI_Publish_name. Both the \fIservice_name\fP and \fIport_name\fP arguments to MPI_Unpublish_name must be identical to the arguments to the previous call to MPI_Publish_name. + +.SH INFO ARGUMENTS +The following keys for \fIinfo\fP are recognized: .sp -The \fIinfo\fP parameter should be MPI_INFO_NULL, as this routine does -not parse any MPI Info arguments. +.sp +.nf +Key Type Description +--- ---- ----------- + +ompi_global_scope bool If set to true, unpublish the name from + the global scope. Unpublish from the local + scope otherwise. See the NAME SCOPE + section for more details. + +.fi + +.sp +\fIbool\fP info keys are actually strings but are evaluated as +follows: if the string value is a number, it is converted to an +integer and cast to a boolean (meaning that zero integers are false +and non-zero values are true). If the string value is +(case-insensitive) "yes" or "true", the boolean is true. If the +string value is (case-insensitive) "no" or "false", the boolean is +false. All other string values are unrecognized, and therefore false. + +.SH NAME SCOPE +RHC fill in here :-) .SH ERRORS .ft R diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index dcfe05f7e0..e2a9d67bbf 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -24,12 +24,14 @@ #include "opal/threads/mutex.h" #include "opal/util/output.h" #include "opal/util/show_help.h" + #include "orte/util/sys_info.h" -#include "orte/dss/dss.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" +#include "opal/dss/dss.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/proc_info.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "ompi/proc/proc.h" #include "ompi/mca/pml/pml.h" #include "ompi/datatype/dt_arch.h" @@ -57,7 +59,6 @@ void ompi_proc_construct(ompi_proc_t* proc) { proc->proc_bml = NULL; proc->proc_pml = NULL; - proc->proc_modex = NULL; OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t); /* By default all processors are supposelly having the same architecture as me. Thus, @@ -82,9 +83,6 @@ void ompi_proc_construct(ompi_proc_t* proc) void ompi_proc_destruct(ompi_proc_t* proc) { - if (proc->proc_modex != NULL) { - OBJ_RELEASE(proc->proc_modex); - } /* As all the convertors are created with OBJ_NEW we can just call OBJ_RELEASE. All, except * the local convertor, will get destroyed at some point here. If the reference count is correct * the local convertor (who has the reference count increased in the datatype) will not get @@ -103,28 +101,23 @@ void ompi_proc_destruct(ompi_proc_t* proc) int ompi_proc_init(void) { - orte_process_name_t *peers; - orte_std_cntr_t i, npeers; + orte_vpid_t i; int rc; uint32_t ui32; OBJ_CONSTRUCT(&ompi_proc_list, opal_list_t); OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t); - /* create a shell of a proc structure for every proc in MPI_COMM_WORLD */ - if(ORTE_SUCCESS != (rc = orte_ns.get_peers(&peers, &npeers, NULL))) { - opal_output(0, "ompi_proc_init: get_peers failed with errno=%d", rc); - return rc; - } - for( i = 0; i < npeers; i++ ) { + /* create proc structures and find self */ + for( i = 0; i < orte_process_info.num_procs; i++ ) { ompi_proc_t *proc = OBJ_NEW(ompi_proc_t); - proc->proc_name = peers[i]; + proc->proc_name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->proc_name.vpid = i; if( i == ORTE_PROC_MY_NAME->vpid ) { ompi_proc_local_proc = proc; proc->proc_flags |= OMPI_PROC_FLAG_LOCAL; } } - free(peers); /* Fill in our local information */ rc = ompi_arch_compute_local_id(&ui32); @@ -157,11 +150,11 @@ int ompi_proc_publish_info(void) { orte_std_cntr_t datalen; void *data; - orte_buffer_t* buf; + opal_buffer_t* buf; int rc; /* pack our local data for others to use */ - buf = OBJ_NEW(orte_buffer_t); + buf = OBJ_NEW(opal_buffer_t); rc = ompi_proc_pack(&ompi_proc_local_proc, 1, buf); if (OMPI_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -169,7 +162,7 @@ int ompi_proc_publish_info(void) } /* send our data into the ether */ - rc = orte_dss.unload(buf, &data, &datalen); + rc = opal_dss.unload(buf, &data, &datalen); if (OMPI_SUCCESS != rc) return rc; OBJ_RELEASE(buf); @@ -197,9 +190,9 @@ ompi_proc_get_info(void) char *hostname; void *data; size_t datalen; - orte_nodeid_t nodeid; + orte_vpid_t nodeid; - if (ORTE_EQUAL != orte_ns.compare_fields(ORTE_NS_CMP_JOBID, + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_JOBID, &ompi_proc_local_proc->proc_name, &proc->proc_name)) { /* not in our jobid -- this shouldn't happen */ @@ -209,12 +202,12 @@ ompi_proc_get_info(void) ret = ompi_modex_recv_string("ompi-proc-info", proc, &data, &datalen); if (OMPI_SUCCESS == ret) { - orte_buffer_t *buf; + opal_buffer_t *buf; orte_std_cntr_t count=1; orte_process_name_t name; - buf = OBJ_NEW(orte_buffer_t); - ret = orte_dss.load(buf, data, datalen); + buf = OBJ_NEW(opal_buffer_t); + ret = opal_dss.load(buf, data, datalen); if (OMPI_SUCCESS != ret) goto out; @@ -222,23 +215,23 @@ ompi_proc_get_info(void) could, in theory, use the unpack code on this proc. We don't,because we aren't adding procs, but need to update them */ - ret = orte_dss.unpack(buf, &name, &count, ORTE_NAME); + ret = opal_dss.unpack(buf, &name, &count, ORTE_NAME); if (ret != ORTE_SUCCESS) goto out; - ret = orte_dss.unpack(buf, &nodeid, &count, ORTE_NODEID); + ret = opal_dss.unpack(buf, &nodeid, &count, ORTE_VPID); if (ret != ORTE_SUCCESS) { ORTE_ERROR_LOG(ret); goto out; } - ret = orte_dss.unpack(buf, &arch, &count, ORTE_UINT32); + ret = opal_dss.unpack(buf, &arch, &count, OPAL_UINT32); if (ret != ORTE_SUCCESS) { ORTE_ERROR_LOG(ret); goto out; } - ret = orte_dss.unpack(buf, &hostname, &count, ORTE_STRING); + ret = opal_dss.unpack(buf, &hostname, &count, OPAL_STRING); if (ret != ORTE_SUCCESS) { ORTE_ERROR_LOG(ret); goto out; @@ -324,7 +317,7 @@ ompi_proc_t** ompi_proc_world(size_t *size) for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); proc = (ompi_proc_t*)opal_list_get_next(proc)) { - if (ORTE_EQUAL == orte_ns.compare_fields(mask, &proc->proc_name, &my_name)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, &my_name)) { ++count; } } @@ -340,7 +333,7 @@ ompi_proc_t** ompi_proc_world(size_t *size) for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); proc = (ompi_proc_t*)opal_list_get_next(proc)) { - if (ORTE_EQUAL == orte_ns.compare_fields(mask, &proc->proc_name, &my_name)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, &my_name)) { procs[count++] = proc; } } @@ -398,7 +391,7 @@ ompi_proc_t * ompi_proc_find ( const orte_process_name_t * name ) for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); proc = (ompi_proc_t*)opal_list_get_next(proc)) { - if (ORTE_EQUAL == orte_ns.compare_fields(mask, &proc->proc_name, name)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, name)) { rproc = proc; break; } @@ -421,7 +414,7 @@ ompi_proc_find_and_add(const orte_process_name_t * name, bool* isnew) for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); proc = (ompi_proc_t*)opal_list_get_next(proc)) { - if (ORTE_EQUAL == orte_ns.compare_fields(mask, &proc->proc_name, name)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, name)) { rproc = proc; *isnew = false; break; @@ -445,31 +438,31 @@ ompi_proc_find_and_add(const orte_process_name_t * name, bool* isnew) int -ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, orte_buffer_t* buf) +ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, opal_buffer_t* buf) { int i, rc; OPAL_THREAD_LOCK(&ompi_proc_lock); for (i=0; iproc_name), 1, ORTE_NAME); + rc = opal_dss.pack(buf, &(proclist[i]->proc_name), 1, ORTE_NAME); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); OPAL_THREAD_UNLOCK(&ompi_proc_lock); return rc; } - rc = orte_dss.pack(buf, &(proclist[i]->proc_nodeid), 1, ORTE_NODEID); + rc = opal_dss.pack(buf, &(proclist[i]->proc_nodeid), 1, ORTE_VPID); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); OPAL_THREAD_UNLOCK(&ompi_proc_lock); return rc; } - rc = orte_dss.pack(buf, &(proclist[i]->proc_arch), 1, ORTE_UINT32); + rc = opal_dss.pack(buf, &(proclist[i]->proc_arch), 1, OPAL_UINT32); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); OPAL_THREAD_UNLOCK(&ompi_proc_lock); return rc; } - rc = orte_dss.pack(buf, &(proclist[i]->proc_hostname), 1, ORTE_STRING); + rc = opal_dss.pack(buf, &(proclist[i]->proc_hostname), 1, OPAL_STRING); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); OPAL_THREAD_UNLOCK(&ompi_proc_lock); @@ -482,7 +475,7 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, orte_buffer_t* buf) int -ompi_proc_unpack(orte_buffer_t* buf, +ompi_proc_unpack(opal_buffer_t* buf, int proclistsize, ompi_proc_t ***proclist, int *newproclistsize, ompi_proc_t ***newproclist) { @@ -509,24 +502,24 @@ ompi_proc_unpack(orte_buffer_t* buf, char *new_hostname; bool isnew = false; int rc; - orte_nodeid_t new_nodeid; + orte_vpid_t new_nodeid; - rc = orte_dss.unpack(buf, &new_name, &count, ORTE_NAME); + rc = opal_dss.unpack(buf, &new_name, &count, ORTE_NAME); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_dss.unpack(buf, &new_nodeid, &count, ORTE_NODEID); + rc = opal_dss.unpack(buf, &new_nodeid, &count, ORTE_VPID); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_dss.unpack(buf, &new_arch, &count, ORTE_UINT32); + rc = opal_dss.unpack(buf, &new_arch, &count, OPAL_UINT32); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_dss.unpack(buf, &new_hostname, &count, ORTE_STRING); + rc = opal_dss.unpack(buf, &new_hostname, &count, OPAL_STRING); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; @@ -572,3 +565,59 @@ ompi_proc_unpack(orte_buffer_t* buf, *proclist = plist; return OMPI_SUCCESS; } + +int ompi_proc_refresh(void) { + ompi_proc_t *proc = NULL; + opal_list_item_t *item = NULL; + orte_vpid_t i = 0; + int rc; + uint32_t ui32; + + OPAL_THREAD_LOCK(&ompi_proc_lock); + + for( item = opal_list_get_first(&ompi_proc_list), i = 0; + item != opal_list_get_end(&ompi_proc_list); + item = opal_list_get_next(item), ++i ) { + proc = (ompi_proc_t*)item; + + /* Does not change: orte_process_info.num_procs */ + /* Does not change: proc->proc_name.vpid */ + proc->proc_name.jobid = ORTE_PROC_MY_NAME->jobid; + if( i == ORTE_PROC_MY_NAME->vpid ) { + ompi_proc_local_proc = proc; + proc->proc_flags |= OMPI_PROC_FLAG_LOCAL; + } else { + proc->proc_flags = 0; + } + } + + /* Fill in our local information */ + rc = ompi_arch_compute_local_id(&ui32); + if (OMPI_SUCCESS != rc) { + return rc; + } + + ompi_proc_local_proc->proc_nodeid = orte_system_info.nodeid; + ompi_proc_local_proc->proc_arch = ui32; + if (ompi_mpi_keep_peer_hostnames) { + if (ompi_mpi_keep_fqdn_hostnames) { + /* use the entire FQDN name */ + ompi_proc_local_proc->proc_hostname = strdup(orte_system_info.nodename); + } else { + /* use the unqualified name */ + char *tmp, *ptr; + tmp = strdup(orte_system_info.nodename); + if (NULL != (ptr = strchr(tmp, '.'))) { + *ptr = '\0'; + } + ompi_proc_local_proc->proc_hostname = strdup(tmp); + free(tmp); + } + } + + rc = ompi_proc_publish_info(); + + OPAL_THREAD_UNLOCK(&ompi_proc_lock); + + return rc; +} diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h index f148ef4376..8bf52a83ee 100644 --- a/ompi/proc/proc.h +++ b/ompi/proc/proc.h @@ -33,10 +33,10 @@ #include "ompi/types.h" #include "opal/class/opal_list.h" -#include "orte/dss/dss_types.h" #include "opal/threads/mutex.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" +#include "opal/dss/dss_types.h" BEGIN_C_DECLS @@ -54,14 +54,12 @@ struct ompi_proc_t { opal_list_item_t super; /** this process' name */ orte_process_name_t proc_name; - /** "nodeid" on which the proc resides */ - orte_nodeid_t proc_nodeid; + /** "nodeid" on which the proc resides - equiv to vpid of local daemon */ + orte_vpid_t proc_nodeid; /** PML specific proc data */ struct mca_pml_base_endpoint_t* proc_pml; /** BML specific proc data */ struct mca_bml_base_endpoint_t* proc_bml; - /** MCA module exchange data */ - opal_object_t* proc_modex; /** architecture of this process */ uint32_t proc_arch; /** Base convertor for the proc described by this process */ @@ -117,7 +115,7 @@ OMPI_DECLSPEC extern ompi_proc_t* ompi_proc_local_proc; * the conclusion of the stage gate. * * @retval OMPI_SUCESS System successfully initialized - * @retval OMPI_ERRROR Initialization failed due to unspecified error + * @retval OMPI_ERROR Initialization failed due to unspecified error */ int ompi_proc_init(void); @@ -135,7 +133,7 @@ int ompi_proc_init(void); * the conclusion of the stage gate. * * @retval OMPI_SUCESS Information available in the modex - * @retval OMPI_ERRROR Failure due to unspecified error + * @retval OMPI_ERROR Failure due to unspecified error */ int ompi_proc_publish_info(void); @@ -271,8 +269,8 @@ OMPI_DECLSPEC ompi_proc_t * ompi_proc_find ( const orte_process_name_t* name ); * @retval OMPI_SUCCESS Success * @retval OMPI_ERROR Unspecified error */ -int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, - orte_buffer_t *buf); +OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, + opal_buffer_t *buf); /** @@ -314,11 +312,26 @@ int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, * OMPI_SUCCESS on success * OMPI_ERROR else */ -int ompi_proc_unpack(orte_buffer_t *buf, - int proclistsize, ompi_proc_t ***proclist, - int *newproclistsize, ompi_proc_t ***newproclist); +OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf, + int proclistsize, ompi_proc_t ***proclist, + int *newproclistsize, ompi_proc_t ***newproclist); +/** + * Refresh the OMPI process subsystem + * + * Refrsh the Open MPI process subsystem. This function will update + * the list of proc instances in the current MPI_COMM_WORLD with + * data from the run-time environemnt. + * + * @note This is primarily used when restarting a process and thus + * need to update the jobid and node name. + * + * @retval OMPI_SUCESS System successfully refreshed + * @retval OMPI_ERROR Refresh failed due to unspecified error + */ +int ompi_proc_refresh(void); + END_C_DECLS #endif /* OMPI_PROC_PROC_H */ diff --git a/ompi/runtime/ompi_cr.c b/ompi/runtime/ompi_cr.c index 7d2ca5d9bd..dc52a93658 100644 --- a/ompi/runtime/ompi_cr.c +++ b/ompi/runtime/ompi_cr.c @@ -49,7 +49,7 @@ #include "orte/util/proc_info.h" #include "orte/mca/snapc/snapc.h" #include "orte/mca/snapc/base/base.h" -#include "orte/mca/smr/smr.h" +#include "orte/runtime/runtime.h" #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" @@ -336,12 +336,14 @@ static int ompi_cr_coord_post_restart(void) { opal_output_verbose(10, ompi_cr_output, "ompi_cr: coord_post_restart: ompi_cr_coord_post_restart()"); +#if 0 /* register myself to require that I finalize before exiting */ - if (ORTE_SUCCESS != (ret = orte_smr.register_sync())) { + if (ORTE_SUCCESS != (ret = orte_register_sync())) { exit_status = ret; goto cleanup; } - +#endif + /* * Notify PML * - Will notify BML and BTL's diff --git a/ompi/runtime/ompi_module_exchange.c b/ompi/runtime/ompi_module_exchange.c index 2b4acdfadb..b83e9271db 100644 --- a/ompi/runtime/ompi_module_exchange.c +++ b/ompi/runtime/ompi_module_exchange.c @@ -19,602 +19,30 @@ */ #include "ompi_config.h" +#include "ompi/constants.h" -#include "ompi/proc/proc.h" -#include "opal/threads/condition.h" -#include "opal/util/output.h" -#include "orte/util/proc_info.h" -#include "orte/class/orte_proc_table.h" - -#include "orte/dss/dss.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/schema/schema.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/gpr/base/base.h" -#include "orte/mca/ns/ns.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" + +#include "orte/mca/grpcomm/grpcomm.h" + +#include "ompi/proc/proc.h" #include "ompi/runtime/ompi_module_exchange.h" -/** - * @file - * - * MODEX DESIGN - * - * Modex data is always associated with a given orte process name, in - * an orte hash table. A backpointer is kept on an ompi_proc_t for - * fast access. The hash table is necessary because modex data is - * received from the GPR for entire jobids and when working with - * dynamic processes, it is possible we will receive data for a - * process not yet in the ompi_proc_all() list of process. This - * information must be kept for later use, because if accept/connect - * causes the proc to be added to the ompi_proc_all() list, the - * subscription to the mdoex information can not be reliably fired - * without causing a potential connection storm. Therefore, we use an - * orte_proc_table backing store to contain all modex information. - * Backpointers are provided from the ompi_proc_t structure to improve - * lookup performance in the common case. - * - * While we could add the now discovered proc into the ompi_proc_all() - * list, this has some problems, in that we don't have the - * architecture and hostname information needed to properly fill in - * the ompi_proc_t structure and we don't want to cause GPR - * communication to get it when we dont' really need to know anything - * about the remote proc. - * - * All data put into the modex (or received from the modex) is - * associated with a given proc,component pair. The data structures - * to maintain this data look something like: - * - * orte_hash_table_t ompi_modex_data -> list of ompi_modex_proc_t objects - * - * +-----------------------------+ - * | ompi_modex_proc_data_t | - * | - opal_list_item_t | - * +-----------------------------+ - * | opal_mutex_t modex_lock | - * | opal_condition_t modex_cond | - * | bool modex_received_data | 1 - * | opal_list_t modules | ---------+ - * +-----------------------------+ | - * * | - * +--------------------------------+ <--------+ - * | ompi_modex_module_data_t | - * | - opal_list_item_t | - * +--------------------------------+ - * | mca_base_component_t component | - * | void *module_data | - * | size_t module_data_size | 1 - * | opal_list_t module_cbs | ---------+ - * +--------------------------------+ | - * * | - * +---------------------------+ <--------+ - * | ompi_modex_cb_t | - * | - opal_list_item_t | - * +---------------------------+ - * | ompi_modex_cb_fn_t cbfunc | - * | void *cbdata | - * +---------------------------+ - * - * In order to maintain subscriptions to the registry for modex - * information, a list of all active subscriptions is maintained as a - * list (ompi_modex_subscriptions) of ompi_modex_subscription_t - * structures. The structure contains the jobid used in the - * subscription. - */ - - -/** - * Modex data for a particular orte process - * - * Locking infrastructure and list of module data for a given orte - * process name. The name association is maintained in the - * ompi_modex_proc_list hash table. - */ -struct ompi_modex_proc_data_t { - /** Structure can be put on lists (including in hash tables) */ - opal_list_item_t super; - /* Lock held whenever the modex data for this proc is being - modified */ - opal_mutex_t modex_lock; - /* Condition variable used when blocking on data from this - process. Should be signalled whenever data is updated for this - process. */ - opal_condition_t modex_cond; - /* True if modex data has ever been received from this process, - false otherwise. */ - bool modex_received_data; - /* List of ompi_modex_module_data_t structures containing all data - received from this process, sorted by component name. */ - opal_list_t modex_module_data; -}; -typedef struct ompi_modex_proc_data_t ompi_modex_proc_data_t; - -static void -ompi_modex_construct(ompi_modex_proc_data_t * modex) -{ - OBJ_CONSTRUCT(&modex->modex_lock, opal_mutex_t); - OBJ_CONSTRUCT(&modex->modex_cond, opal_condition_t); - modex->modex_received_data = false; - OBJ_CONSTRUCT(&modex->modex_module_data, opal_list_t); -} - -static void -ompi_modex_destruct(ompi_modex_proc_data_t * modex) -{ - OBJ_DESTRUCT(&modex->modex_module_data); - OBJ_DESTRUCT(&modex->modex_cond); - OBJ_DESTRUCT(&modex->modex_lock); -} - -OBJ_CLASS_INSTANCE(ompi_modex_proc_data_t, opal_object_t, - ompi_modex_construct, ompi_modex_destruct); - - - -/** - * Modex data for a particular component name - * - * Container for data for a particular proc,component pair. This - * structure should be contained in the modules list in an - * ompi_modex_proc_data_t structure to maintain an association with a - * given proc. The list is then searched for a matching component - * name. - * - * While searching the list or reading from (or writing to) this - * structure, the lock in the proc_data_t should be held. - */ -struct ompi_modex_module_data_t { - /** Structure can be put on lists */ - opal_list_item_t super; - /** Component information for this data */ - mca_base_component_t component; - /** Binary blob of data associated with this proc,component pair */ - void *module_data; - /** Size (in bytes) of module_data */ - size_t module_data_size; - /** callbacks that should be fired when module_data changes. */ - opal_list_t module_cbs; -}; -typedef struct ompi_modex_module_data_t ompi_modex_module_data_t; - -static void -ompi_modex_module_construct(ompi_modex_module_data_t * module) -{ - memset(&module->component, 0, sizeof(module->component)); - module->module_data = NULL; - module->module_data_size = 0; - OBJ_CONSTRUCT(&module->module_cbs, opal_list_t); -} - -static void -ompi_modex_module_destruct(ompi_modex_module_data_t * module) -{ - opal_list_item_t *item; - while (NULL != (item = opal_list_remove_first(&module->module_cbs))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&module->module_cbs); -} - -OBJ_CLASS_INSTANCE(ompi_modex_module_data_t, - opal_list_item_t, - ompi_modex_module_construct, - ompi_modex_module_destruct); - -/** - * Callback data for modex updates - * - * Data container for update callbacks that should be fired whenever a - * given proc,component pair has a modex data update. - */ -struct ompi_modex_cb_t { - opal_list_item_t super; - ompi_modex_cb_fn_t cbfunc; - void *cbdata; -}; -typedef struct ompi_modex_cb_t ompi_modex_cb_t; - -OBJ_CLASS_INSTANCE(ompi_modex_cb_t, - opal_list_item_t, - NULL, - NULL); - - - -/** - * Global modex list of proc data - * - * Global bhash table associating orte_process_name_t values with an - * ompi_modex_proc_data_t container. - * - * \note The ompi_modex_lock mutex should be held whenever this list - * is being updated or searched. - */ -static opal_hash_table_t ompi_modex_data; - -/** - * Global modex lock - * - * Global lock for modex usage, particularily protecting the - * ompi_modex_subscriptions list and the ompi_modex_data hash table. - */ -static opal_mutex_t ompi_modex_lock; - -static opal_mutex_t ompi_modex_string_lock; - -/* - * Global buffer we use to collect modex info for later - * transmission - */ -static orte_buffer_t ompi_modex_buffer; -static orte_std_cntr_t ompi_modex_num_entries; - - -int -ompi_modex_init(void) -{ - OBJ_CONSTRUCT(&ompi_modex_data, opal_hash_table_t); - OBJ_CONSTRUCT(&ompi_modex_lock, opal_mutex_t); - OBJ_CONSTRUCT(&ompi_modex_string_lock, opal_mutex_t); - - OBJ_CONSTRUCT(&ompi_modex_buffer, orte_buffer_t); - ompi_modex_num_entries = 0; - - opal_hash_table_init(&ompi_modex_data, 256); - - return OMPI_SUCCESS; -} - - -int -ompi_modex_finalize(void) -{ - opal_hash_table_remove_all(&ompi_modex_data); - OBJ_DESTRUCT(&ompi_modex_data); - - OBJ_DESTRUCT(&ompi_modex_string_lock); - OBJ_DESTRUCT(&ompi_modex_lock); - OBJ_DESTRUCT(&ompi_modex_buffer); - - return OMPI_SUCCESS; -} - - -/** - * Find data for a given component in a given modex_proc_data_t - * container. - * - * Find data for a given component in a given modex_proc_data_t - * container. The proc_data's modex_lock must be held during this - * search. - */ -static ompi_modex_module_data_t * -ompi_modex_lookup_module(ompi_modex_proc_data_t *proc_data, - mca_base_component_t *component, - bool create_if_not_found) -{ - ompi_modex_module_data_t *module_data = NULL; - for (module_data = (ompi_modex_module_data_t *) opal_list_get_first(&proc_data->modex_module_data); - module_data != (ompi_modex_module_data_t *) opal_list_get_end(&proc_data->modex_module_data); - module_data = (ompi_modex_module_data_t *) opal_list_get_next(module_data)) { - if (mca_base_component_compatible(&module_data->component, component) == 0) { - return module_data; - } - } - - if (create_if_not_found) { - module_data = OBJ_NEW(ompi_modex_module_data_t); - if (NULL == module_data) return NULL; - - memcpy(&module_data->component, component, sizeof(mca_base_component_t)); - opal_list_append(&proc_data->modex_module_data, &module_data->super); - - return module_data; - } - - return NULL; -} - - -/** - * Find ompi_modex_proc_data_t container associated with given - * orte_process_name_t. - * - * Find ompi_modex_proc_data_t container associated with given - * orte_process_name_t. The global lock should *NOT* be held when - * calling this function. - */ -static ompi_modex_proc_data_t* -ompi_modex_lookup_orte_proc(orte_process_name_t *orte_proc) -{ - ompi_modex_proc_data_t *proc_data; - - OPAL_THREAD_LOCK(&ompi_modex_lock); - proc_data = (ompi_modex_proc_data_t*) - orte_hash_table_get_proc(&ompi_modex_data, orte_proc); - if (NULL == proc_data) { - /* The proc clearly exists, so create a modex structure - for it */ - proc_data = OBJ_NEW(ompi_modex_proc_data_t); - if (NULL == proc_data) { - opal_output(0, "ompi_modex_lookup_orte_proc: unable to allocate ompi_modex_proc_data_t\n"); - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return NULL; - } - orte_hash_table_set_proc(&ompi_modex_data, orte_proc, proc_data); - } - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - - return proc_data; -} - - -/** - * Find ompi_modex_proc_data_t container associated with given ompi_proc_t - * - * Find ompi_modex_proc_data_t container associated with given - * ompi_proc_t. The global lock should *NOT* be held when calling - * this function. - */ -static ompi_modex_proc_data_t* -ompi_modex_lookup_proc(ompi_proc_t *proc) -{ - ompi_modex_proc_data_t *proc_data = - (ompi_modex_proc_data_t *) proc->proc_modex; - - if (NULL == proc_data) { - proc_data = ompi_modex_lookup_orte_proc(&proc->proc_name); - if (NULL == proc_data) return NULL; - - /* set the association with the ompi_proc, if not already done. */ - OPAL_THREAD_LOCK(&ompi_modex_lock); - if (NULL == proc->proc_modex) { - OBJ_RETAIN(proc_data); - proc->proc_modex = &proc_data->super.super; - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - } else { - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - } - } - - return proc_data; -} - - -/** - * Get the local buffer's data - */ -int -ompi_modex_get_my_buffer(orte_buffer_t *buf) -{ - int rc; - - OPAL_THREAD_LOCK(&ompi_modex_lock); - /* put our process name in the buffer so it can be unpacked later */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return rc; - } - - /* put the number of entries into the buffer */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &ompi_modex_num_entries, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return rc; - } - - /* if there are entries, copy the data across */ - if (0 < ompi_modex_num_entries) { - if (ORTE_SUCCESS != (orte_dss.copy_payload(buf, &ompi_modex_buffer))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return rc; - } - } - - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return ORTE_SUCCESS; -} - -/** - * Process modex data - */ -int -ompi_modex_process_data(orte_buffer_t *buf) -{ - orte_std_cntr_t i, j, num_procs, num_entries; - opal_list_item_t *item; - void *bytes = NULL; - orte_std_cntr_t cnt; - orte_process_name_t proc_name; - ompi_modex_proc_data_t *proc_data; - ompi_modex_module_data_t *module_data; - mca_base_component_t component; - int rc; - - /* extract the number of entries in the buffer */ - cnt=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &num_procs, &cnt, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* process the buffer */ - for (i=0; i < num_procs; i++) { - /* unpack the process name */ - cnt=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &proc_name, &cnt, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* look up the modex data structure */ - proc_data = ompi_modex_lookup_orte_proc(&proc_name); - if (proc_data == NULL) { - /* report the error */ - opal_output(0, "ompi_modex_process_data: received modex info for unknown proc %s\n", - ORTE_NAME_PRINT(&proc_name)); - return OMPI_ERR_NOT_FOUND; - } - - /* unpack the number of entries for this proc */ - cnt=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &num_entries, &cnt, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - OPAL_THREAD_LOCK(&proc_data->modex_lock); - - /* - * Extract the component name and version - since there is one for each - * component type/name/version - process them all - */ - for (j = 0; j < num_entries; j++) { - size_t num_bytes; - char *ptr; - - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &ptr, &cnt, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - strcpy(component.mca_type_name, ptr); - free(ptr); - - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &ptr, &cnt, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - strcpy(component.mca_component_name, ptr); - free(ptr); - - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, - &component.mca_component_major_version, &cnt, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, - &component.mca_component_minor_version, &cnt, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &num_bytes, &cnt, ORTE_SIZE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (num_bytes != 0) { - if (NULL == (bytes = malloc(num_bytes))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - cnt = (orte_std_cntr_t) num_bytes; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, bytes, &cnt, ORTE_BYTE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - num_bytes = cnt; - } else { - bytes = NULL; - } - - /* - * Lookup the corresponding modex structure - */ - if (NULL == (module_data = ompi_modex_lookup_module(proc_data, - &component, - true))) { - opal_output(0, "ompi_modex_process_data: ompi_modex_lookup_module failed\n"); - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - return OMPI_ERR_NOT_FOUND; - } - module_data->module_data = bytes; - module_data->module_data_size = num_bytes; - proc_data->modex_received_data = true; - opal_condition_signal(&proc_data->modex_cond); - - if (opal_list_get_size(&module_data->module_cbs)) { - ompi_proc_t *proc = ompi_proc_find(&proc_name); - - if (NULL != proc) { - OPAL_THREAD_LOCK(&proc->proc_lock); - /* call any registered callbacks */ - for (item = opal_list_get_first(&module_data->module_cbs); - item != opal_list_get_end(&module_data->module_cbs); - item = opal_list_get_next(item)) { - ompi_modex_cb_t *cb = (ompi_modex_cb_t *) item; - cb->cbfunc(&module_data->component, - proc, bytes, num_bytes, cb->cbdata); - } - OPAL_THREAD_UNLOCK(&proc->proc_lock); - } - } - } - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - } - - return OMPI_SUCCESS; -} - - int ompi_modex_send(mca_base_component_t * source_component, - const void *data, - size_t size) + const void *data, size_t size) { int rc; - char *ptr; - - OPAL_THREAD_LOCK(&ompi_modex_lock); + char * name = mca_base_component_to_string(source_component); - /* Pack the component name information into the local buffer */ - ptr = source_component->mca_type_name; - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &ptr, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - ptr = source_component->mca_component_name; - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &ptr, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &source_component->mca_component_major_version, 1, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &source_component->mca_component_minor_version, 1, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &size, 1, ORTE_SIZE))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* Pack the actual data into the buffer */ - if (0 != size) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, (void *) data, size, ORTE_BYTE))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } + if(NULL == name) { + return OMPI_ERR_OUT_OF_RESOURCE; } - /* track the number of entries */ - ++ompi_modex_num_entries; - - cleanup: - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - + rc = orte_grpcomm.set_proc_attr(name, data, size); + free(name); return rc; } @@ -625,108 +53,23 @@ ompi_modex_recv(mca_base_component_t * component, void **buffer, size_t * size) { - ompi_modex_proc_data_t *proc_data; - ompi_modex_module_data_t *module_data; - - /* make sure we could possibly have modex data */ - if (0 == strcmp(orte_gpr_base_selected_component.gpr_version.mca_component_name, - "null")) { - return OMPI_ERR_NOT_IMPLEMENTED; - } + int rc; + char * name = mca_base_component_to_string(component); - proc_data = ompi_modex_lookup_proc(proc); - if (NULL == proc_data) return OMPI_ERR_NOT_FOUND; - - OPAL_THREAD_LOCK(&proc_data->modex_lock); - - /* wait until data is available */ - while (proc_data->modex_received_data == false) { - opal_condition_wait(&proc_data->modex_cond, &proc_data->modex_lock); - } - - /* look up module */ - module_data = ompi_modex_lookup_module(proc_data, component, false); - - /* copy the data out to the user */ - if ((NULL == module_data) || - (module_data->module_data_size == 0)) { - opal_output(0, "modex recv: no module avail or zero byte size"); - *buffer = NULL; - *size = 0; - } else { - void *copy = malloc(module_data->module_data_size); - - if (copy == NULL) { - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - memcpy(copy, module_data->module_data, module_data->module_data_size); - *buffer = copy; - *size = module_data->module_data_size; - } - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - - return OMPI_SUCCESS; -} - - -int -ompi_modex_recv_nb(mca_base_component_t *component, - ompi_proc_t *proc, - ompi_modex_cb_fn_t cbfunc, - void *cbdata) -{ - ompi_modex_proc_data_t *proc_data; - ompi_modex_module_data_t *module_data; - ompi_modex_cb_t *cb; - - proc_data = ompi_modex_lookup_proc(proc); - if (NULL == proc_data) return OMPI_ERR_NOT_FOUND; - - OPAL_THREAD_LOCK(&proc_data->modex_lock); - - /* lookup / create module */ - module_data = ompi_modex_lookup_module(proc_data, component, true); - if (NULL == module_data) { - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + if(NULL == name) { return OMPI_ERR_OUT_OF_RESOURCE; } - - /* register the callback */ - cb = OBJ_NEW(ompi_modex_cb_t); - cb->cbfunc = cbfunc; - cb->cbdata = cbdata; - opal_list_append(&module_data->module_cbs, &cb->super); - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - - return OMPI_SUCCESS; + + rc = orte_grpcomm.get_proc_attr(proc->proc_name, name, buffer, size); + free(name); + return rc; } - -static mca_base_component_t modex_component = { - MCA_BASE_VERSION_1_0_0, - "modex", - MCA_BASE_VERSION_1_0_0, - "", - MCA_BASE_VERSION_1_0_0, - NULL, - NULL -}; - - int ompi_modex_send_string(const char* key, const void *buffer, size_t size) { - int ret; - - OPAL_THREAD_LOCK(&ompi_modex_string_lock); - strncpy(modex_component.mca_component_name, key, - MCA_BASE_MAX_COMPONENT_NAME_LEN); - ret = ompi_modex_send(&modex_component, buffer, size); - OPAL_THREAD_UNLOCK(&ompi_modex_string_lock); - - return ret; + return orte_grpcomm.set_proc_attr(key, buffer, size); } @@ -735,13 +78,5 @@ ompi_modex_recv_string(const char* key, struct ompi_proc_t *source_proc, void **buffer, size_t *size) { - int ret; - - OPAL_THREAD_LOCK(&ompi_modex_string_lock); - strncpy(modex_component.mca_component_name, key, - MCA_BASE_MAX_COMPONENT_NAME_LEN); - ret = ompi_modex_recv(&modex_component, source_proc, buffer, size); - OPAL_THREAD_UNLOCK(&ompi_modex_string_lock); - - return ret; + return orte_grpcomm.get_proc_attr(source_proc->proc_name, key, buffer, size); } diff --git a/ompi/runtime/ompi_module_exchange.h b/ompi/runtime/ompi_module_exchange.h index a69006e121..021b0ef5a3 100644 --- a/ompi/runtime/ompi_module_exchange.h +++ b/ompi/runtime/ompi_module_exchange.h @@ -51,8 +51,7 @@ #include #endif -#include "orte/dss/dss_types.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" struct mca_base_component_t; struct ompi_proc_t; @@ -171,50 +170,6 @@ OMPI_DECLSPEC int ompi_modex_recv(struct mca_base_component_t *dest_component, void **buffer, size_t *size); -/** - * Non-blocking modex receive callback - * - * Prototype for non-blocking modex receive callback. - * - * @param[in] component Pointer to copy of the component struct - * @param[in] proc Peer process infromation is from - * @param[in] buffer Newly updated buffer - * @param[in] size Size (in bytes) of buffer - * @param[in] cbdata Callback data provided when non-blocking - * receive is posted - */ -typedef void (*ompi_modex_cb_fn_t)(struct mca_base_component_t *component, - struct ompi_proc_t* proc, - void* buffer, - size_t size, - void* cbdata); - - -/** - * Register to receive a callback on change to module specific data. - * - * The non-blocking version of ompi_modex_recv(). All information - * about ompi_modex_recv() applies to ompi_modex_recv_nb(), with the - * exception of what happens when data is available for the given peer - * process but not the specified module. In that case, no callback - * will be fired until data is available. - * - * @param[in] component A pointer to this module's component struct - * @param[in] proc Peer process to receive from - * @param[in] cbfunc Callback function when data is available, - * of type ompi_modex_cb_fn_t - * @param[in] cbdata Opaque callback data to pass to cbfunc - * - * @retval OMPI_SUCCESS Success - * @retval OMPI_ERR_OUT_OF_RESOURCE No memory could be allocated - * for internal data structures - */ -OMPI_DECLSPEC int ompi_modex_recv_nb(struct mca_base_component_t *component, - struct ompi_proc_t* proc, - ompi_modex_cb_fn_t cbfunc, - void* cbdata); - - /** * Receive a buffer from a given peer * @@ -248,66 +203,6 @@ OMPI_DECLSPEC int ompi_modex_recv_string(const char* key, void **buffer, size_t *size); -/** - * Retrieve a copy of the modex buffer - * - * Each component will "send" its data on its own. The modex - * collects that data into a local static buffer. At some point, - * we need to provide a copy of the collected info so someone - * (usually mpi_init) can send it to everyone else. This function - * xfers the payload in the local static buffer into the provided - * buffer, thus resetting the local buffer for future use. - * - * @note This function is probably not useful outside of application - * initialization code. - * - * @param[in] *buf Pointer to the target buffer - * - * @retval OMPI_SUCCESS Successfully exchanged information - * @retval OMPI_ERROR An unspecified error occurred - */ -OMPI_DECLSPEC int ompi_modex_get_my_buffer(orte_buffer_t *buf); - -/** - * Process the data in a modex buffer - * - * Given a buffer containing a set of modex entries, this - * function will destructively read the buffer, adding the - * modex info to each proc. An error will be returned if - * modex info is found for a proc that is not yet in the - * ompi_proc table - * - * @param[in] *buf Pointer to a buffer containing the data - * - * @retval OMPI_SUCCESS Successfully exchanged information - * @retval OMPI_ERROR An unspecified error occurred - */ -OMPI_DECLSPEC int ompi_modex_process_data(orte_buffer_t *buf); - - -/** - * Initialize the modex system - * - * Allocate memory for the local data cache and initialize the - * module exchange system. Does not cause communication nor any - * subscriptions to be placed on the registry. - * - * @retval OMPI_SUCCESS Successfully initialized modex subsystem - */ -OMPI_DECLSPEC int ompi_modex_init(void); - - -/** - * Finalize the modex system - * - * Release any memory associated with the modex system, remove all - * subscriptions on the GPR and end all non-blocking update triggers - * currently available on the system. - * - * @retval OMPI_SUCCESS Successfully shut down modex subsystem - */ -OMPI_DECLSPEC int ompi_modex_finalize(void); - END_C_DECLS #endif /* MCA_OMPI_MODULE_EXCHANGE_H */ diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index 0c2abee30b..21ee80b46c 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -35,8 +35,8 @@ #include "opal/mca/backtrace/backtrace.h" #include "orte/util/proc_info.h" #include "orte/runtime/runtime.h" -#include "orte/runtime/params.h" -#include "orte/mca/ns/ns.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/sys_info.h" #include "ompi/communicator/communicator.h" @@ -81,7 +81,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, if (!ompi_mpi_initialized || ompi_mpi_finalized) { if (orte_initialized) { - orte_errmgr.error_detected(errcode, NULL); + orte_errmgr.abort(errcode, NULL); } } @@ -148,16 +148,14 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, abort_procs = (orte_process_name_t*)malloc(sizeof(orte_process_name_t) * nabort_procs); if (NULL == abort_procs) { /* quick clean orte and get out */ - orte_errmgr.error_detected(errcode, - "Abort unable to malloc memory to kill procs", - NULL); + orte_errmgr.abort(errcode, "Abort unable to malloc memory to kill procs"); } /* put all the local procs in the abort list */ for (i = 0 ; i < ompi_comm_size(comm) ; ++i) { - if (ORTE_EQUAL != orte_ns.compare_fields(ORTE_NS_CMP_ALL, + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &comm->c_local_group->grp_proc_pointers[i]->proc_name, - orte_process_info.my_name)) { + ORTE_PROC_MY_NAME)) { assert(count <= nabort_procs); abort_procs[count++] = comm->c_local_group->grp_proc_pointers[i]->proc_name; } else { @@ -169,9 +167,9 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, /* if requested, kill off remote procs too */ if (kill_remote_of_intercomm) { for (i = 0 ; i < ompi_comm_remote_size(comm) ; ++i) { - if (ORTE_EQUAL != orte_ns.compare_fields(ORTE_NS_CMP_ALL, + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &comm->c_remote_group->grp_proc_pointers[i]->proc_name, - orte_process_info.my_name)) { + ORTE_PROC_MY_NAME)) { assert(count <= nabort_procs); abort_procs[count++] = comm->c_remote_group->grp_proc_pointers[i]->proc_name; @@ -183,16 +181,16 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, } if (nabort_procs > 0) { +#if 0 ret = orte_errmgr.abort_procs_request(abort_procs, nabort_procs); if (OMPI_SUCCESS != ret) { - orte_errmgr.error_detected(ret, - "Open MPI failed to abort procs as requested (%d). Exiting.", - ret, NULL); + orte_errmgr.abort(ret, "Open MPI failed to abort procs as requested (%d). Exiting.", ret); } +#endif } /* now that we've aborted everyone else, gracefully die. */ - orte_errmgr.error_detected(errcode, NULL); + orte_errmgr.abort(errcode, NULL); return OMPI_SUCCESS; } diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index 2bdcf54c63..40a313a6b9 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -45,12 +45,8 @@ #include "opal/mca/carto/base/base.h" #include "orte/util/proc_info.h" -#include "orte/mca/schema/schema.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/runtime/runtime.h" @@ -66,7 +62,6 @@ #include "ompi/info/info.h" #include "ompi/runtime/mpiruntime.h" #include "ompi/attribute/attribute.h" -#include "ompi/runtime/ompi_module_exchange.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" #include "ompi/mca/osc/base/base.h" @@ -81,6 +76,8 @@ #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/runtime/params.h" #include "ompi/mca/mpool/base/mpool_base_tree.h" +#include "ompi/mca/dpm/base/base.h" +#include "ompi/mca/pubsub/base/base.h" #if OPAL_ENABLE_FT == 1 #include "ompi/mca/crcp/crcp.h" @@ -136,12 +133,6 @@ int ompi_mpi_finalize(void) MPI lifetime, to get better latency when not using TCP */ opal_progress_event_users_increment(); - /* mark that I called finalize before exiting */ - if (ORTE_SUCCESS != (ret = orte_smr.register_sync())) { - ORTE_ERROR_LOG(ret); - return ret; - } - /* If maffinity was setup, tear it down */ if (ompi_mpi_maffinity_setup) { opal_maffinity_base_close(); @@ -248,7 +239,17 @@ int ompi_mpi_finalize(void) if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) { return ret; } - + + /* finalize the pubsub functions */ + if ( OMPI_SUCCESS != (ret = ompi_pubsub_base_close())) { + return ret; + } + + /* finalize the DPM framework */ + if ( OMPI_SUCCESS != (ret = ompi_dpm_base_close())) { + return ret; + } + /* free internal error resources */ if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) { return ret; @@ -281,11 +282,6 @@ int ompi_mpi_finalize(void) return ret; } - /* free module exchange resources */ - if (OMPI_SUCCESS != (ret = ompi_modex_finalize())) { - return ret; - } - /* Close down MCA modules */ /* io is opened lazily, so it's only necessary to close it if it diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 038db8dec1..73e8da0a47 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -44,16 +44,12 @@ #include "orte/util/sys_info.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" +#include "orte/util/name_fns.h" #include "orte/runtime/runtime.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/gpr/gpr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/schema/schema.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/grpcomm.h" -#include "orte/runtime/params.h" +#include "orte/runtime/orte_globals.h" #include "ompi/constants.h" #include "ompi/mpi/f77/constants.h" @@ -84,6 +80,8 @@ #include "ompi/debuggers/debuggers.h" #include "ompi/proc/proc.h" #include "ompi/mca/pml/base/pml_base_bsend.h" +#include "ompi/mca/dpm/base/base.h" +#include "ompi/mca/pubsub/base/base.h" #if OPAL_ENABLE_FT == 1 #include "ompi/mca/crcp/crcp.h" @@ -228,7 +226,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_proc_t** procs; size_t nprocs; char *error = NULL; - orte_buffer_t mdx_buf, rbuf; bool timing = false; int param, value; struct timeval ompistart, ompistop; @@ -256,17 +253,11 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Setup ORTE stage 1, note that we are not infrastructre */ - if (ORTE_SUCCESS != (ret = orte_init(ORTE_NON_INFRASTRUCTURE))) { + if (ORTE_SUCCESS != (ret = orte_init(ORTE_NON_TOOL))) { error = "ompi_mpi_init: orte_init failed"; goto error; } - /* register myself to require that I finalize before exiting */ - if (ORTE_SUCCESS != (ret = orte_smr.register_sync())) { - error = "ompi_mpi_init: register sync failed"; - goto error; - } - /* check for timing request - get stop time and report elapsed time if so */ if (timing) { gettimeofday(&ompistop, NULL); @@ -303,7 +294,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } if (!set) { char *vpid; - orte_ns.get_vpid_string(&vpid, orte_process_info.my_name); + orte_util_convert_vpid_to_string(&vpid, ORTE_PROC_MY_NAME->vpid); opal_show_help("help-mpi-runtime", "mpi_init:startup:paffinity-unavailable", true, vpid); @@ -341,14 +332,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - /* Initialize module exchange - this MUST happen before proc_init - * as proc_init needs to send modex info! - */ - if (OMPI_SUCCESS != (ret = ompi_modex_init())) { - error = "ompi_modex_init() failed"; - goto error; - } - /* Initialize OMPI procs */ if (OMPI_SUCCESS != (ret = ompi_proc_init())) { error = "mca_proc_init() failed"; @@ -524,29 +507,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) gettimeofday(&ompistart, NULL); } - /* get the modex buffer so we can exchange it */ - OBJ_CONSTRUCT(&mdx_buf, orte_buffer_t); - if (OMPI_SUCCESS != (ret = ompi_modex_get_my_buffer(&mdx_buf))) { - error = "ompi_modex_execute() failed"; - goto error; - } - - /* execute the exchange - this function also acts as a barrier + /* exchange connection info - this function also acts as a barrier * as it will not return until the exchange is complete */ - OBJ_CONSTRUCT(&rbuf, orte_buffer_t); - if (OMPI_SUCCESS != (ret = orte_grpcomm.allgather(&mdx_buf, &rbuf))) { - error = "orte_gprcomm_allgather failed"; + if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(NULL))) { + error = "orte_grpcomm_modex failed"; goto error; } - OBJ_DESTRUCT(&mdx_buf); - - /* process the modex data into the proc structures */ - if (OMPI_SUCCESS != (ret = ompi_modex_process_data(&rbuf))) { - error = "ompi_modex_process_data failed"; - goto error; - } - OBJ_DESTRUCT(&rbuf); if (timing) { gettimeofday(&ompistop, NULL); @@ -680,10 +647,30 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } + /* Setup the publish/subscribe (PUBSUB) framework */ + if (OMPI_SUCCESS != (ret = ompi_pubsub_base_open())) { + error = "ompi_pubsub_base_open() failed"; + goto error; + } + if (OMPI_SUCCESS != (ret = ompi_pubsub_base_select())) { + error = "ompi_pubsub_base_select() failed"; + goto error; + } + + /* Setup the dynamic process management (DPM) framework */ + if (OMPI_SUCCESS != (ret = ompi_dpm_base_open())) { + error = "ompi_dpm_base_open() failed"; + goto error; + } + if (OMPI_SUCCESS != (ret = ompi_dpm_base_select())) { + error = "ompi_dpm_base_select() failed"; + goto error; + } + /* Check whether we have been spawned or not. We introduce that at the very end, since we need collectives, datatypes, ptls etc. up and running here.... */ - if (OMPI_SUCCESS != (ret = ompi_comm_dyn_init())) { + if (OMPI_SUCCESS != (ret = ompi_dpm.dyn_init())) { error = "ompi_comm_dyn_init() failed"; goto error; } @@ -773,7 +760,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) if (orte_debug_flag) { opal_output(0, "%s ompi_mpi_init completed", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } /* Do we need to wait for a TotalView-like debugger? */ diff --git a/ompi/runtime/ompi_mpi_preconnect.c b/ompi/runtime/ompi_mpi_preconnect.c index ced88cf367..bfb02c770d 100644 --- a/ompi/runtime/ompi_mpi_preconnect.c +++ b/ompi/runtime/ompi_mpi_preconnect.c @@ -23,9 +23,12 @@ #include "ompi/communicator/communicator.h" #include "ompi/request/request.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/mca/dpm/dpm.h" + #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" + int ompi_init_preconnect_mpi(void) { @@ -140,7 +143,7 @@ ompi_init_preconnect_oob(void) ret = orte_rml.send(&procs[next]->proc_name, outmsg, 1, - ORTE_RML_TAG_WIREUP, + OMPI_RML_TAG_WIREUP, 0); if (ret < 0) return ret; } @@ -150,7 +153,7 @@ ompi_init_preconnect_oob(void) ret = orte_rml.recv(&procs[prev]->proc_name, inmsg, 1, - ORTE_RML_TAG_WIREUP, + OMPI_RML_TAG_WIREUP, 0); if (ret < 0) return ret; } diff --git a/ompi/tools/Makefile.am b/ompi/tools/Makefile.am index bea3b83803..a4bf813d7c 100644 --- a/ompi/tools/Makefile.am +++ b/ompi/tools/Makefile.am @@ -24,9 +24,11 @@ EXTRA_DIST += tools/win_makefile SUBDIRS += \ tools/ompi_info \ tools/wrappers \ - tools/ortetools + tools/ortetools \ + tools/ompi-server DIST_SUBDIRS += \ tools/ompi_info \ tools/wrappers \ - tools/ortetools + tools/ortetools \ + tools/ompi-server diff --git a/orte/tools/orteprobe/Makefile.am b/ompi/tools/ompi-server/Makefile.am similarity index 61% rename from orte/tools/orteprobe/Makefile.am rename to ompi/tools/ompi-server/Makefile.am index 16e289df4c..a43fb04cf9 100644 --- a/orte/tools/orteprobe/Makefile.am +++ b/ompi/tools/ompi-server/Makefile.am @@ -18,22 +18,23 @@ if OMPI_INSTALL_BINARIES -dist_pkgdata_DATA = help-orteprobe.txt +dist_pkgdata_DATA = help-ompi-server.txt -bin_PROGRAMS = orteprobe +bin_PROGRAMS = ompi-server endif libs = \ - $(top_builddir)/orte/libopen-rte.la + $(top_builddir)/ompi/libmpi.la \ + $(top_builddir)/opal/libopen-pal.la -orteprobe_SOURCES = \ - orteprobe.h \ - orteprobe.c +ompi_server_SOURCES = \ + ompi-server.c +ompi_server_LDADD = $(libs) +ompi_server_LDFLAGS = +ompi_server_DEPENDENCIES = $(libs) -orteprobe_LDADD = $(libs) -orteprobe_LDFLAGS = -orteprobe_DEPENDENCIES = $(libs) - -clean-local: - test -z "$(OMPI_CXX_TEMPLATE_REPOSITORY)" || $(RM) -rf $(OMPI_CXX_TEMPLATE_REPOSITORY) +# AM 1.9.6 seems to have a bug in its dependencies for install-man if +# dist_ and nodist_ are used, so explicitly add to EXTRA_DIST... +man_MANS = ompi-server.1 +EXTRA_DIST = $(man_MANS) diff --git a/orte/mca/gpr/replica/transition_layer/Makefile.am b/ompi/tools/ompi-server/help-ompi-server.txt similarity index 81% rename from orte/mca/gpr/replica/transition_layer/Makefile.am rename to ompi/tools/ompi-server/help-ompi-server.txt index 612dc9f9cb..4baa0241be 100644 --- a/orte/mca/gpr/replica/transition_layer/Makefile.am +++ b/ompi/tools/ompi-server/help-ompi-server.txt @@ -1,3 +1,4 @@ +# -*- text -*- # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology @@ -15,9 +16,8 @@ # # $HEADER$ # - -noinst_LTLIBRARIES = libmca_gpr_replica_tl.la -libmca_gpr_replica_tl_la_SOURCES = \ - gpr_replica_tl.h \ - gpr_replica_dict_tl.c \ - gpr_replica_segment_tl.c +# This is the US/English general help file for Open MPI's data server. +# +[ompiserver:usage] +Usage: %s [OPTION]... +Start an Open MPI data server. diff --git a/ompi/tools/ompi-server/ompi-server.1 b/ompi/tools/ompi-server/ompi-server.1 new file mode 100644 index 0000000000..32af06bb0d --- /dev/null +++ b/ompi/tools/ompi-server/ompi-server.1 @@ -0,0 +1,77 @@ +.\" +.\" Copyright (c) 2007 Los Alamos National Security, LLC +.\" All rights reserved. +.\" +.\" Man page for OMPI's ompi-server command +.\" +.\" .TH name section center-footer left-footer center-header +.TH OMPI-SERVER 1 "Dec 2007" "Open MPI" "OPEN MPI COMMANDS" +.\" ************************** +.\" Name Section +.\" ************************** +.SH NAME +. +ompi-server \- Server for supporting name publish/lookup operations. +. +.PP +. +.\" ************************** +.\" Synopsis Section +.\" ************************** +.SH SYNOPSIS +. +.B ompi-server +.R [ options ] +. +.\" ************************** +.\" Options Section +.\" ************************** +.SH Options +. +\fIompi-server\fR acts as a data server for Open MPI jobs to exchange +contact information in support of MPI-2's Publish_name and Lookup_name +functions. +. +.TP 10 +.B -h | --help +Display help for this command +. +. +.TP +.B -d | --debug +Enable verbose output for debugging +. +. +.TP +.B --debug-devel +Enable verbose debugging output from the Open RTE. +. +. +.TP +.B --report-uri \fR\fP +Report the Open MPI contact information for the server. This information is +required for MPI jobs to use the data server. Three parameter values are supported: +(a) '-', indicating that the uri is to be printed to stdout; (b) '+', indicating that +the uri is to be printed to stderr; and (c) "file:path-to-file", indicating that +the uri is to be printed to the specified file. The "path-to-file" can be either +absolute or relative, but must be in a location where the user has write +permissions. Please note that the resulting file must be read-accessible to +expected users of the server. +. +. +.\" ************************** +.\" Description Section +.\" ************************** +.SH DESCRIPTION +. +.PP +\fIompi-server\fR acts as a data server for Open MPI jobs to exchange +contact information in support of MPI-2's Publish_name and Lookup_name +functions. +. +.\" ************************** +.\" See Also Section +.\" ************************** +. +.SH SEE ALSO +. diff --git a/ompi/tools/ompi-server/ompi-server.c b/ompi/tools/ompi-server/ompi-server.c new file mode 100644 index 0000000000..13bc9830f7 --- /dev/null +++ b/ompi/tools/ompi-server/ompi-server.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * Copyright (c) 2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_NETDB_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#include +#include +#include + + +#include "opal/event/event.h" +#include "opal/mca/base/base.h" +#include "opal/util/cmd_line.h" +#include "opal/util/output.h" +#include "opal/util/printf.h" +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/util/daemon_init.h" +#include "opal/runtime/opal.h" +#include "opal/mca/base/mca_base_param.h" + + +#include "orte/util/name_fns.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_data_server.h" + +/* + * Globals + */ + +static opal_event_t term_handler; +static opal_event_t int_handler; + +static void shutdown_callback(int fd, short flags, void *arg); + +static bool help=false; +static bool debug=false; +static bool no_daemonize=false; +static char *report_uri=NULL; + +/* + * define the context table for obtaining parameters + */ +opal_cmd_line_init_t ompi_server_cmd_line_opts[] = { + /* Various "obvious" options */ + { NULL, NULL, NULL, 'h', NULL, "help", 0, + &help, OPAL_CMD_LINE_TYPE_BOOL, + "This help message" }, + + { NULL, NULL, NULL, 'd', NULL, "debug", 0, + &debug, OPAL_CMD_LINE_TYPE_BOOL, + "Debug the Open MPI server" }, + + { "orte", "debug", NULL, '\0', NULL, "debug-devel", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Debug the OpenRTE" }, + + { "orte", "no_daemonize", NULL, '\0', NULL, "no-daemonize", 0, + &no_daemonize, OPAL_CMD_LINE_TYPE_BOOL, + "Don't daemonize into the background" }, + + { NULL, NULL, NULL, '\0', NULL, "report-uri", 1, + &report_uri, OPAL_CMD_LINE_TYPE_STRING, + "Report the server's uri on stdout"}, + + /* End of list */ + { NULL, NULL, NULL, '\0', NULL, NULL, 0, + NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } +}; + +int main(int argc, char *argv[]) +{ + int ret = 0; + opal_cmd_line_t *cmd_line = NULL; + char *rml_uri; + + /* init enough of opal to process cmd lines */ + if (OPAL_SUCCESS != opal_init_util()) { + fprintf(stderr, "OPAL failed to initialize -- orted aborting\n"); + exit(1); + } + + /* setup to check common command line options that just report and die */ + cmd_line = OBJ_NEW(opal_cmd_line_t); + opal_cmd_line_create(cmd_line, ompi_server_cmd_line_opts); + mca_base_cmd_line_setup(cmd_line); + if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false, + argc, argv))) { + char *args = NULL; + args = opal_cmd_line_get_usage_msg(cmd_line); + opal_show_help("help-ompi-server.txt", "ompiserver:usage", false, + argv[0], args); + free(args); + return ret; + } + + /* check for help request */ + if (help) { + char *args = NULL; + args = opal_cmd_line_get_usage_msg(cmd_line); + opal_show_help("help-ompi-server.txt", "ompiserver:usage", false, + argv[0], args); + free(args); + return 1; + } + + /* + * Since this process can now handle MCA/GMCA parameters, make sure to + * process them. + */ + mca_base_cmd_line_process_args(cmd_line, &environ, &environ); + + /* register and process the orte params */ + if (ORTE_SUCCESS != (ret = orte_register_params())) { + return ret; + } + + /* detach from controlling terminal + * otherwise, remain attached so output can get to us + */ + if(orte_debug_flag == false && + debug == false && + no_daemonize == false) { + opal_daemon_init(NULL); + } + +#if OPAL_ENABLE_FT == 1 + /* Disable the checkpoint notification routine for this + * tool. As we will never need to checkpoint this tool. + * Note: This must happen before opal_init(). + */ + opal_cr_set_enabled(false); + + /* Select the none component, since we don't actually use a checkpointer */ + opal_setenv(mca_base_param_env_var("crs"), + "none", + true, &environ); + /* Mark as a tool program */ + opal_setenv(mca_base_param_env_var("opal_cr_is_tool"), + "1", + true, &environ); +#endif + + /* Perform the standard init, but flag that we are a tool + * so that we only open up the communications infrastructure. No + * session directories will be created. + */ + if (ORTE_SUCCESS != (ret = orte_init(ORTE_TOOL))) { + fprintf(stderr, "ompi-server: failed to initialize -- aborting\n"); + exit(1); + } + + /* report out our URI, if we were requested to do so, using syntax + * proposed in an email thread by Jeff Squyres + */ + if (NULL != report_uri) { + rml_uri = orte_rml.get_contact_info(); + if (0 == strcmp(report_uri, "-")) { + /* if '-', then output to stdout */ + printf("%s\n", rml_uri); + } else if (0 == strcmp(report_uri, "+")) { + /* if '+', output to stderr */ + fprintf(stderr, "%s\n", rml_uri); + } else { + /* treat it as a filename and output into it */ + FILE *fp; + fp = fopen(report_uri, "w"); + if (NULL == fp) { + fprintf(stderr, "ompi-server: failed to open designated file -- aborting\n"); + orte_finalize(); + exit(1); + } + fprintf(fp, "%s\n", rml_uri); + fclose(fp); + } + free(rml_uri); + } + + /* setup the data server to listen for commands */ + if (ORTE_SUCCESS != (ret = orte_data_server_init())) { + fprintf(stderr, "ompi-server: failed to start data server -- aborting\n"); + orte_finalize(); + exit(1); + } + + /* Set signal handlers to catch kill signals so we can properly clean up + * after ourselves. + */ + opal_event_set(&term_handler, SIGTERM, OPAL_EV_SIGNAL, + shutdown_callback, NULL); + opal_event_add(&term_handler, NULL); + opal_event_set(&int_handler, SIGINT, OPAL_EV_SIGNAL, + shutdown_callback, NULL); + opal_event_add(&int_handler, NULL); + + /* We actually do *not* want the server to voluntarily yield() the + processor more than necessary. The server already blocks when + it is doing nothing, so it doesn't use any more CPU cycles than + it should; but when it *is* doing something, we do not want it + to be unnecessarily delayed because it voluntarily yielded the + processor in the middle of its work. + + For example: when a message arrives at the server, we want the + OS to wake up the server in a timely fashion (which most OS's + seem good about doing) and then we want the server to process + the message as fast as possible. If the server yields and lets + aggressive MPI applications get the processor back, it may be a + long time before the OS schedules the server to run again + (particularly if there is no IO event to wake it up). Hence, + publish and lookup (for example) may be significantly delayed + before being delivered to MPI processes, which can be + problematic in some scenarios (e.g., COMM_SPAWN). */ + opal_progress_set_yield_when_idle(false); + + /* Change the default behavior of libevent such that we want to + continually block rather than blocking for the default timeout + and then looping around the progress engine again. There + should be nothing in the server that cannot block in libevent + until "something" happens (i.e., there's no need to keep + cycling through progress because the only things that should + happen will happen in libevent). This is a minor optimization, + but what the heck... :-) */ + opal_progress_set_event_flag(OPAL_EVLOOP_ONCE); + + if (debug) { + opal_output(0, "%s ompi-server: up and running!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + } + + /* wait to hear we are done */ + opal_event_dispatch(); + + /* should never get here, but if we do... */ + + /* Finalize and clean up ourselves */ + if (ORTE_SUCCESS != (ret = orte_finalize())) { + ORTE_ERROR_LOG(ret); + } + return ret; +} + +static void shutdown_callback(int fd, short flags, void *arg) +{ + int ret; + + if (debug) { + opal_output(0, "%s ompi-server: finalizing", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + } + + /* Finalize and clean up ourselves */ + if (ORTE_SUCCESS != (ret = orte_finalize())) { + ORTE_ERROR_LOG(ret); + } + exit(ret); +} diff --git a/ompi/tools/ompi_info/components.cc b/ompi/tools/ompi_info/components.cc index 0b86e930c4..17ae8ffc9b 100644 --- a/ompi/tools/ompi_info/components.cc +++ b/ompi/tools/ompi_info/components.cc @@ -52,6 +52,7 @@ #include "opal/mca/crs/base/base.h" #endif #include "opal/runtime/opal.h" +#include "opal/dss/dss.h" #include "ompi/mca/allocator/allocator.h" #include "ompi/mca/allocator/base/base.h" @@ -75,6 +76,9 @@ #include "ompi/mca/topo/base/base.h" #include "ompi/mca/osc/osc.h" #include "ompi/mca/osc/base/base.h" +#include "ompi/mca/pubsub/base/base.h" +#include "ompi/mca/dpm/base/base.h" + #if OPAL_ENABLE_FT == 1 #include "ompi/mca/crcp/crcp.h" #include "ompi/mca/crcp/base/base.h" @@ -82,38 +86,26 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/base/base.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/gpr/base/base.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/base.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/oob.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/odls/odls.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/ras/ras.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/rds/rds.h" -#include "orte/mca/rds/base/base.h" +#include "orte/mca/ras/base/ras_private.h" #include "orte/mca/rmaps/rmaps.h" #include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/rmgr/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/mca/routed/base/base.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/base/base.h" -#include "orte/mca/odls/odls.h" -#include "orte/mca/odls/base/base.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/smr/base/base.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/base/base.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" #if OPAL_ENABLE_FT == 1 #include "orte/mca/snapc/snapc.h" #include "orte/mca/snapc/base/base.h" @@ -176,9 +168,9 @@ void ompi_info::open_components() // it. opal_event_init(); - // Open the DPS + // Open the DSS - if (ORTE_SUCCESS != orte_dss_open()) { + if (ORTE_SUCCESS != opal_dss_open()) { printf( "Unable to initialize the DSS\n" ); return; } @@ -193,7 +185,7 @@ void ompi_info::open_components() // Register the ORTE layer's MCA parameters - orte_register_params(false); + orte_register_params(); // Register the MPI layer's MCA parameters @@ -236,9 +228,9 @@ void ompi_info::open_components() component_map["installdirs"] = &opal_installdirs_components; // ORTE frameworks - // Set orte_process_info.seed to true to force all frameworks to + // Set orte_process_info.hnp to true to force all frameworks to // open components - orte_process_info.seed = true; + orte_process_info.hnp = true; mca_oob_base_open(); component_map["oob"] = &mca_oob_base_components; @@ -249,29 +241,17 @@ void ompi_info::open_components() orte_errmgr_base_open(); component_map["errmgr"] = &orte_errmgr_base_components_available; - orte_gpr_base_open(); - component_map["gpr"] = &orte_gpr_base_components_available; - orte_grpcomm_base_open(); component_map["grpcomm"] = &mca_grpcomm_base_components_available; orte_iof_base_open(); component_map["iof"] = &orte_iof_base.iof_components_opened; - orte_ns_base_open(); - component_map["ns"] = &mca_ns_base_components_available; - orte_ras_base_open(); component_map["ras"] = &orte_ras_base.ras_opened; - orte_rds_base_open(); - component_map["rds"] = &orte_rds_base.rds_components; - orte_rmaps_base_open(); - component_map["rmaps"] = &orte_rmaps_base.rmaps_opened; - - orte_rmgr_base_open(); - component_map["rmgr"] = &orte_rmgr_base.rmgr_components; + component_map["rmaps"] = &orte_rmaps_base.available_components; orte_rml_base_open(); component_map["rml"] = &orte_rml_base_components; @@ -279,17 +259,11 @@ void ompi_info::open_components() orte_routed_base_open(); component_map["routed"] = &orte_routed_base_components; - orte_pls_base_open(); - component_map["pls"] = &orte_pls_base.available_components; + orte_plm_base_open(); + component_map["plm"] = &orte_plm_base.available_components; - orte_odls_base_open(); - component_map["odls"] = &orte_odls_base.available_components; - - orte_sds_base_open(); - component_map["sds"] = &orte_sds_base_components_available; - - orte_smr_base_open(); - component_map["smr"] = &orte_smr_base.smr_components; + orte_ess_base_open(); + component_map["ess"] = &orte_ess_base_components_available; #if OPAL_ENABLE_FT == 1 orte_snapc_base_open(); @@ -335,6 +309,12 @@ void ompi_info::open_components() mca_topo_base_open(); component_map["topo"] = &mca_topo_base_components_opened; + ompi_pubsub_base_open(); + component_map["pubsub"] = &ompi_pubsub_base_components_available; + + ompi_dpm_base_open(); + component_map["dpm"] = &ompi_dpm_base_components_available; + #if OPAL_ENABLE_FT == 1 ompi_crcp_base_open(); component_map["crcp"] = &ompi_crcp_base_components_available; @@ -369,6 +349,8 @@ void ompi_info::close_components() #if OPAL_ENABLE_FT == 1 ompi_crcp_base_close(); #endif + ompi_dpm_base_close(); + ompi_pubsub_base_close(); mca_topo_base_close(); // the PML has to call the base PTL close function. mca_btl_base_close(); @@ -386,16 +368,11 @@ void ompi_info::close_components() #endif orte_filem_base_close(); orte_iof_base_close(); - orte_sds_base_close(); - orte_smr_base_close(); - orte_pls_base_close(); + orte_ess_base_close(); + orte_plm_base_close(); orte_odls_base_close(); - orte_rmgr_base_close(); orte_rmaps_base_close(); - orte_rds_base_close(); orte_ras_base_close(); - orte_ns_base_close(); - orte_gpr_base_close(); orte_grpcomm_base_close(); orte_errmgr_base_close(); orte_rml_base_close(); diff --git a/ompi/tools/ompi_info/ompi_info.cc b/ompi/tools/ompi_info/ompi_info.cc index 8154f8ec5a..2741a821d6 100644 --- a/ompi/tools/ompi_info/ompi_info.cc +++ b/ompi/tools/ompi_info/ompi_info.cc @@ -191,6 +191,8 @@ int main(int argc, char *argv[]) #if OPAL_ENABLE_FT == 1 ompi_info::mca_types.push_back("crs"); #endif + ompi_info::mca_types.push_back("dpm"); + ompi_info::mca_types.push_back("pubsub"); ompi_info::mca_types.push_back("allocator"); ompi_info::mca_types.push_back("coll"); @@ -209,21 +211,17 @@ int main(int argc, char *argv[]) #endif ompi_info::mca_types.push_back("errmgr"); - ompi_info::mca_types.push_back("gpr"); ompi_info::mca_types.push_back("grpcomm"); ompi_info::mca_types.push_back("iof"); - ompi_info::mca_types.push_back("ns"); ompi_info::mca_types.push_back("oob"); ompi_info::mca_types.push_back("odls"); + ompi_info::mca_types.push_back("ess"); ompi_info::mca_types.push_back("ras"); - ompi_info::mca_types.push_back("rds"); ompi_info::mca_types.push_back("rmaps"); - ompi_info::mca_types.push_back("rmgr"); ompi_info::mca_types.push_back("rml"); ompi_info::mca_types.push_back("routed"); - ompi_info::mca_types.push_back("pls"); + ompi_info::mca_types.push_back("plm"); ompi_info::mca_types.push_back("sds"); - ompi_info::mca_types.push_back("soh"); #if OPAL_ENABLE_FT == 1 ompi_info::mca_types.push_back("snapc"); #endif diff --git a/opal/Makefile.am b/opal/Makefile.am index 71187f2819..38b87f70aa 100644 --- a/opal/Makefile.am +++ b/opal/Makefile.am @@ -84,3 +84,4 @@ include threads/Makefile.am include mca/Makefile.am include win32/Makefile.am include tools/Makefile.am +include dss/Makefile.am diff --git a/orte/dss/Makefile.am b/opal/dss/Makefile.am similarity index 94% rename from orte/dss/Makefile.am rename to opal/dss/Makefile.am index 69e9c4aa4b..01fefd33bf 100644 --- a/orte/dss/Makefile.am +++ b/opal/dss/Makefile.am @@ -17,15 +17,14 @@ # $HEADER$ # -# This makefile.am does not stand on its own - it is included from orte/Makefile.am +# This makefile.am does not stand on its own - it is included from opal/Makefile.am headers += \ dss/dss.h \ dss/dss_types.h \ dss/dss_internal.h -noinst_LTLIBRARIES += dss/libdss.la -dss_libdss_la_SOURCES = \ +libopen_pal_la_SOURCES += \ dss/dss_internal_functions.c \ dss/dss_arith.c \ dss/dss_compare.c \ diff --git a/orte/dss/dss.h b/opal/dss/dss.h similarity index 70% rename from orte/dss/dss.h rename to opal/dss/dss.h index 47e582310a..16d18b6786 100644 --- a/orte/dss/dss.h +++ b/opal/dss/dss.h @@ -22,19 +22,16 @@ * Data packing subsystem. */ -#ifndef ORTE_DSS_H_ -#define ORTE_DSS_H_ +#ifndef OPAL_DSS_H_ +#define OPAL_DSS_H_ -#include "orte_config.h" +#include "opal_config.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" +#include "opal/types.h" -#include "orte/dss/dss_types.h" +#include "opal/dss/dss_types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /** * Set the buffer type. @@ -51,11 +48,11 @@ extern "C" { * * @param type The new buffer type * - * @retval ORTE_SUCCESS Operation successfully executed + * @retval OPAL_SUCCESS Operation successfully executed * - * @retval ORTE_ERROR_VALUE An appropriate error code + * @retval OPAL_ERROR_VALUE An appropriate error code */ -typedef int (*orte_dss_set_buffer_type_fn_t)(orte_buffer_t *buffer, orte_dss_buffer_type_t type); +typedef int (*opal_dss_set_buffer_type_fn_t)(opal_buffer_t *buffer, opal_dss_buffer_type_t type); /** * Top-level itnerface function to pack one or more values into a @@ -95,22 +92,22 @@ typedef int (*orte_dss_set_buffer_type_fn_t)(orte_buffer_t *buffer, orte_dss_buf * @param type The type of the data to be packed - must be one of the * DSS defined data types. * - * @retval ORTE_SUCCESS The data was packed as requested. + * @retval OPAL_SUCCESS The data was packed as requested. * - * @retval ORTE_ERROR(s) An appropriate ORTE error code indicating the + * @retval OPAL_ERROR(s) An appropriate OPAL error code indicating the * problem encountered. This error code should be handled * appropriately. * * @code - * orte_buffer_t *buffer; + * opal_buffer_t *buffer; * int32_t src; * - * status_code = orte_dss.pack(buffer, &src, 1, ORTE_INT32); + * status_code = opal_dss.pack(buffer, &src, 1, OPAL_INT32); * @endcode */ -typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_values, - orte_data_type_t type); +typedef int (*opal_dss_pack_fn_t)(opal_buffer_t *buffer, const void *src, + int32_t num_values, + opal_data_type_t type); /** * Unpack values from a buffer. @@ -141,7 +138,7 @@ typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, * same buffer. * * Warning: The caller is responsible for providing adequate memory - * storage for the requested data. The orte_dss_peek() function is + * storage for the requested data. The opal_dss_peek() function is * provided to assist in meeting this requirement. As noted below, the user * must provide a parameter indicating the maximum number of values that * can be unpacked into the allocated memory. If more values exist in the @@ -169,7 +166,7 @@ typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, * string in the array - the caller must only provide adequate memory * for the array of pointers. * - * @param *num A pointer to a orte_std_cntr_t value indicating the maximum + * @param *num A pointer to a int32_t value indicating the maximum * number of values that are to be unpacked, beginning at the location * pointed to by src. This is provided to help protect the caller from * memory overrun. Note that a string @@ -188,10 +185,10 @@ typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, * available, the buffer will be in an unpackable state - the dss will * return an error code to warn of this condition. * - * @retval ORTE_SUCCESS The next item in the buffer was successfully + * @retval OPAL_SUCCESS The next item in the buffer was successfully * unpacked. * - * @retval ORTE_ERROR(s) The unpack function returns an error code + * @retval OPAL_ERROR(s) The unpack function returns an error code * under one of several conditions: (a) the number of values in the * item exceeds the max num provided by the caller; (b) the type of * the next item in the buffer does not match the type specified by @@ -199,23 +196,23 @@ typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, * buffer or an attempt to read past the end of the buffer. * * @code - * orte_buffer_t *buffer; + * opal_buffer_t *buffer; * int32_t dest; * char **string_array; - * orte_std_cntr_t num_values; + * int32_t num_values; * * num_values = 1; - * status_code = orte_dss.unpack(buffer, (void*)&dest, &num_values, ORTE_INT32); + * status_code = opal_dss.unpack(buffer, (void*)&dest, &num_values, OPAL_INT32); * * num_values = 5; * string_array = malloc(num_values*sizeof(char *)); - * status_code = orte_dss.unpack(buffer, (void*)(string_array), &num_values, ORTE_STRING); + * status_code = opal_dss.unpack(buffer, (void*)(string_array), &num_values, OPAL_STRING); * * @endcode */ -typedef int (*orte_dss_unpack_fn_t)(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *max_num_values, - orte_data_type_t type); +typedef int (*opal_dss_unpack_fn_t)(opal_buffer_t *buffer, void *dest, + int32_t *max_num_values, + opal_data_type_t type); /** * Get the type and number of values of the next item in the buffer. @@ -227,23 +224,23 @@ typedef int (*orte_dss_unpack_fn_t)(orte_buffer_t *buffer, void *dest, * * @param buffer A pointer to the buffer in question. * - * @param type A pointer to an orte_data_type_t variable where the + * @param type A pointer to an opal_data_type_t variable where the * type of the next item in the buffer is to be stored. Caller must * have memory backing this location. * - * @param number A pointer to a orte_std_cntr_t variable where the number of + * @param number A pointer to a int32_t variable where the number of * data values in the next item is to be stored. Caller must have * memory backing this location. * - * @retval ORTE_SUCCESS Requested info was successfully returned. - * @retval ORTE_ERROR(s) An appropriate error code indicating the + * @retval OPAL_SUCCESS Requested info was successfully returned. + * @retval OPAL_ERROR(s) An appropriate error code indicating the * problem will be returned. This should be handled appropriately by * the caller. * */ -typedef int (*orte_dss_peek_next_item_fn_t)(orte_buffer_t *buffer, - orte_data_type_t *type, - orte_std_cntr_t *number); +typedef int (*opal_dss_peek_next_item_fn_t)(opal_buffer_t *buffer, + opal_data_type_t *type, + int32_t *number); /** * Unload the data payload from a buffer. @@ -266,24 +263,24 @@ typedef int (*orte_dss_peek_next_item_fn_t)(orte_buffer_t *buffer, * * @param size The size (in bytes) of the data payload in the buffer. * - * @retval ORTE_SUCCESS The request was succesfully completed. + * @retval OPAL_SUCCESS The request was succesfully completed. * - * @retval ORTE_ERROR(s) An appropriate error code indicating the + * @retval OPAL_ERROR(s) An appropriate error code indicating the * problem will be returned. This should be handled appropriately by * the caller. * * @code - * orte_buffer_t *buffer; + * opal_buffer_t *buffer; * uint8_t *bytes; - * orte_std_cntr_t size; + * int32_t size; * - * status_code = orte_dss.unload(buffer, (void**)(&bytes), &size); + * status_code = opal_dss.unload(buffer, (void**)(&bytes), &size); * OBJ_RELEASE(buffer); * @endcode */ -typedef int (*orte_dss_unload_fn_t)(orte_buffer_t *buffer, +typedef int (*opal_dss_unload_fn_t)(opal_buffer_t *buffer, void **payload, - orte_std_cntr_t *size); + int32_t *size); /** * Load a data payload into a buffer. @@ -310,24 +307,24 @@ typedef int (*orte_dss_unload_fn_t)(orte_buffer_t *buffer, * * @param size The size (in bytes) of the provided payload. * - * @retval ORTE_SUCCESS The request was successfully completed + * @retval OPAL_SUCCESS The request was successfully completed * - * @retval ORTE_ERROR(s) An appropriate error code indicating the + * @retval OPAL_ERROR(s) An appropriate error code indicating the * problem will be returned. This should be handled appropriately by * the caller. * * @code - * orte_buffer_t *buffer; + * opal_buffer_t *buffer; * uint8_t bytes; - * orte_std_cntr_t size; + * int32_t size; * - * buffer = OBJ_NEW(orte_buffer_t); - * status_code = orte_dss.load(buffer, (void*)(&bytes), size); + * buffer = OBJ_NEW(opal_buffer_t); + * status_code = opal_dss.load(buffer, (void*)(&bytes), size); * @endcode */ -typedef int (*orte_dss_load_fn_t)(orte_buffer_t *buffer, +typedef int (*opal_dss_load_fn_t)(opal_buffer_t *buffer, void *payload, - orte_std_cntr_t size); + int32_t size); /** @@ -336,8 +333,8 @@ typedef int (*orte_dss_load_fn_t)(orte_buffer_t *buffer, * payload from one buffer and loads it into another. This is a destructive * action - see the unload and load descriptions above. */ -typedef int (*orte_dss_xfer_payload_fn_t)(orte_buffer_t *dest, - orte_buffer_t *src); +typedef int (*opal_dss_xfer_payload_fn_t)(opal_buffer_t *dest, + opal_buffer_t *src); /** * Copy a payload from one buffer to another @@ -350,23 +347,23 @@ typedef int (*orte_dss_xfer_payload_fn_t)(orte_buffer_t *dest, * source buffer's payload will remain intact, as will any pre-existing * payload in the destination's buffer. */ -typedef int (*orte_dss_copy_payload_fn_t)(orte_buffer_t *dest, - orte_buffer_t *src); +typedef int (*opal_dss_copy_payload_fn_t)(opal_buffer_t *dest, + opal_buffer_t *src); /** * DSS initialization function. * * In dynamic libraries, declared objects and functions don't get - * loaded until called. We need to ensure that the orte_dss function + * loaded until called. We need to ensure that the opal_dss function * structure gets loaded, so we provide an "open" call that is * executed as part of the program startup. */ -ORTE_DECLSPEC int orte_dss_open(void); +OPAL_DECLSPEC int opal_dss_open(void); /** * DSS finalize function */ -ORTE_DECLSPEC int orte_dss_close(void); +OPAL_DECLSPEC int opal_dss_close(void); /** @@ -387,12 +384,12 @@ ORTE_DECLSPEC int orte_dss_close(void); * @param type The type of the data to be copied - must be one of * the DSS defined data types. * - * @retval ORTE_SUCCESS The value was successfully copied. + * @retval OPAL_SUCCESS The value was successfully copied. * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. * */ -typedef int (*orte_dss_copy_fn_t)(void **dest, void *src, orte_data_type_t type); +typedef int (*opal_dss_copy_fn_t)(void **dest, void *src, opal_data_type_t type); /** * Compare two data values. @@ -407,8 +404,8 @@ typedef int (*orte_dss_copy_fn_t)(void **dest, void *src, orte_data_type_t type) * @retval 0 Indicates two values are equal * @retval +1 Indicates second value is greater than first value */ -typedef int (*orte_dss_compare_fn_t)(void *value1, void *value2, - orte_data_type_t type); +typedef int (*opal_dss_compare_fn_t)(void *value1, void *value2, + opal_data_type_t type); /** @@ -429,11 +426,11 @@ typedef int (*orte_dss_compare_fn_t)(void *value1, void *value2, * @param type The type of the data value - must be one of * the DSS defined data types or an error will be returned. * - * @retval ORTE_SUCCESS The value was successfully copied. + * @retval OPAL_SUCCESS The value was successfully copied. * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_size_fn_t)(size_t *size, void *src, orte_data_type_t type); +typedef int (*opal_dss_size_fn_t)(size_t *size, void *src, opal_data_type_t type); /** @@ -443,11 +440,11 @@ typedef int (*orte_dss_size_fn_t)(size_t *size, void *src, orte_data_type_t type * needs some way to know how to print them (i.e., convert them to a string * representation). * - * @retval ORTE_SUCCESS The value was successfully printed. + * @retval OPAL_SUCCESS The value was successfully printed. * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_print_fn_t)(char **output, char *prefix, void *src, orte_data_type_t type); +typedef int (*opal_dss_print_fn_t)(char **output, char *prefix, void *src, opal_data_type_t type); /** @@ -456,11 +453,11 @@ typedef int (*orte_dss_print_fn_t)(char **output, char *prefix, void *src, orte_ * Uses the dss.print command to obtain a string version of the data value * and prints it to the designated output stream. * - * @retval ORTE_SUCCESS The value was successfully printed. + * @retval OPAL_SUCCESS The value was successfully printed. * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_dump_fn_t)(int output_stream, void *src, orte_data_type_t type); +typedef int (*opal_dss_dump_fn_t)(int output_stream, void *src, opal_data_type_t type); /** * Set a data value @@ -473,11 +470,11 @@ typedef int (*orte_dss_dump_fn_t)(int output_stream, void *src, orte_data_type_t * and type to the specified location and type. Use "copy" if you want dynamic allocation * of storage. * - * @retval ORTE_SUCCESS The value was successfully stored + * @retval OPAL_SUCCESS The value was successfully stored * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_set_fn_t)(orte_data_value_t *value, void *new_value, orte_data_type_t type); +typedef int (*opal_dss_set_fn_t)(opal_dss_value_t *value, void *new_value, opal_data_type_t type); /** * Get a data value @@ -490,12 +487,12 @@ typedef int (*orte_dss_set_fn_t)(orte_data_value_t *value, void *new_value, orte * to that of the value, after ensuring that the value's type matches the specified one. * Use "copy" if you want dynamic allocation of memory. * - * @retval ORTE_SUCCESS The value was successfully retrieved + * @retval OPAL_SUCCESS The value was successfully retrieved * - * @retval ORTE_ERROR(s) An appropriate error code - usually caused by the specified type + * @retval OPAL_ERROR(s) An appropriate error code - usually caused by the specified type * not matching the data type within the stored object. */ -typedef int (*orte_dss_get_fn_t)(void **data, orte_data_value_t *value, orte_data_type_t type); +typedef int (*opal_dss_get_fn_t)(void **data, opal_dss_value_t *value, opal_data_type_t type); /** * Perform an arithemetic operation on a data value @@ -504,12 +501,12 @@ typedef int (*orte_dss_get_fn_t)(void **data, orte_data_value_t *value, orte_dat * a function by which it can manipulate the data value within the data_value object. This * is the equivalent to a C++ access function. * - * @retval ORTE_SUCCESS The value was successfully retrieved + * @retval OPAL_SUCCESS The value was successfully retrieved * - * @retval ORTE_ERROR(s) An appropriate error code - usually caused by the specified type + * @retval OPAL_ERROR(s) An appropriate error code - usually caused by the specified type * not matching the data type within the stored object. */ -typedef int (*orte_dss_arith_fn_t)(orte_data_value_t *value, orte_data_value_t *operand, orte_dss_arith_op_t operation); +typedef int (*opal_dss_arith_fn_t)(opal_dss_value_t *value, opal_dss_value_t *operand, opal_dss_arith_op_t operation); /** * Increment a data value @@ -518,11 +515,11 @@ typedef int (*orte_dss_arith_fn_t)(orte_data_value_t *value, orte_data_value_t * * a function by which it can manipulate the data value within the data_value object. This * is the equivalent to a C++ access function. * - * @retval ORTE_SUCCESS The value was successfully retrieved + * @retval OPAL_SUCCESS The value was successfully retrieved * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_increment_fn_t)(orte_data_value_t *value); +typedef int (*opal_dss_increment_fn_t)(opal_dss_value_t *value); /** * Decrement a data value @@ -531,11 +528,11 @@ typedef int (*orte_dss_increment_fn_t)(orte_data_value_t *value); * a function by which it can manipulate the data value within the data_value object. This * is the equivalent to a C++ access function. * - * @retval ORTE_SUCCESS The value was successfully retrieved + * @retval OPAL_SUCCESS The value was successfully retrieved * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_decrement_fn_t)(orte_data_value_t *value); +typedef int (*opal_dss_decrement_fn_t)(opal_dss_value_t *value); /** * Release the storage used by a data value @@ -544,21 +541,21 @@ typedef int (*orte_dss_decrement_fn_t)(orte_data_value_t *value); * a function by which it can release the storage associated with a value * stored in a data value object. */ -typedef void (*orte_dss_release_fn_t)(orte_data_value_t *value); +typedef void (*opal_dss_release_fn_t)(opal_dss_value_t *value); /** * Register a set of data handling functions. * * * This function registers a set of data type functions for a specific * type. An integer is returned that should be used a an argument to - * future invocations of orte_dss.pack(), orte_dss.unpack(), orte_dss.copy(), - * and orte_dss.compare, which + * future invocations of opal_dss.pack(), opal_dss.unpack(), opal_dss.copy(), + * and opal_dss.compare, which * will trigger calls to the appropriate functions. This * is most useful when extending the datatypes that the dss can - * handle; pack and unpack functions can nest calls to orte_dss.pack() - * / orte_dss.unpack(), so defining small pack/unpack functions can be + * handle; pack and unpack functions can nest calls to opal_dss.pack() + * / opal_dss.unpack(), so defining small pack/unpack functions can be * used recursively to build larger types (e.g., packing/unpacking - * structs can use calls to orte_dss.pack()/unpack() to serialize / + * structs can use calls to opal_dss.pack()/unpack() to serialize / * deserialize individual members). This is likewise true for the copy * and compare functions. * @@ -575,28 +572,38 @@ typedef void (*orte_dss_release_fn_t)(orte_data_value_t *value); * @param name [IN] String name for this pair (mainly for debugging) * @param type [OUT] Type number for this registration * - * @returns ORTE_SUCCESS upon success + * @returns OPAL_SUCCESS upon success * */ -typedef int (*orte_dss_register_fn_t)(orte_dss_pack_fn_t pack_fn, - orte_dss_unpack_fn_t unpack_fn, - orte_dss_copy_fn_t copy_fn, - orte_dss_compare_fn_t compare_fn, - orte_dss_size_fn_t size_fn, - orte_dss_print_fn_t print_fn, - orte_dss_release_fn_t release_fn, +typedef int (*opal_dss_register_fn_t)(opal_dss_pack_fn_t pack_fn, + opal_dss_unpack_fn_t unpack_fn, + opal_dss_copy_fn_t copy_fn, + opal_dss_compare_fn_t compare_fn, + opal_dss_size_fn_t size_fn, + opal_dss_print_fn_t print_fn, + opal_dss_release_fn_t release_fn, bool structured, - const char *name, orte_data_type_t *type); + const char *name, opal_data_type_t *type); /* * This function looks up the string name corresponding to the identified * data type - used for debugging messages. */ -typedef char* (*orte_dss_lookup_data_type_fn_t)(orte_data_type_t type); +typedef char* (*opal_dss_lookup_data_type_fn_t)(opal_data_type_t type); /* * Dump the data type list - used for debugging to see what has been registered */ -typedef void (*orte_dss_dump_data_types_fn_t)(int output); +typedef void (*opal_dss_dump_data_types_fn_t)(int output); + +/* utility funtions for specialized packing and unpacking. + * These are useful for creating pack/unpack functions for user + * defined types. these are NOT for general purpose use */ +typedef int (*opal_dss_pack_buffer_fn_t)(opal_buffer_t *buffer, const void *src, + int32_t num_values, opal_data_type_t type); +typedef int (*opal_dss_unpack_buffer_fn_t)(opal_buffer_t *buffer, void *dest, + int32_t *num_values, + opal_data_type_t type); + /** * Base structure for the DSS @@ -604,36 +611,36 @@ typedef void (*orte_dss_dump_data_types_fn_t)(int output); * Base module structure for the DSS - presents the required function * pointers to the calling interface. */ -struct orte_dss_t { - orte_dss_set_fn_t set; - orte_dss_get_fn_t get; - orte_dss_arith_fn_t arith; - orte_dss_increment_fn_t increment; - orte_dss_decrement_fn_t decrement; - orte_dss_set_buffer_type_fn_t set_buffer_type; - orte_dss_pack_fn_t pack; - orte_dss_unpack_fn_t unpack; - orte_dss_copy_fn_t copy; - orte_dss_compare_fn_t compare; - orte_dss_size_fn_t size; - orte_dss_print_fn_t print; - orte_dss_release_fn_t release; - orte_dss_peek_next_item_fn_t peek; - orte_dss_unload_fn_t unload; - orte_dss_load_fn_t load; - orte_dss_xfer_payload_fn_t xfer_payload; - orte_dss_copy_payload_fn_t copy_payload; - orte_dss_register_fn_t register_type; - orte_dss_lookup_data_type_fn_t lookup_data_type; - orte_dss_dump_data_types_fn_t dump_data_types; - orte_dss_dump_fn_t dump; +struct opal_dss_t { + opal_dss_set_fn_t set; + opal_dss_get_fn_t get; + opal_dss_arith_fn_t arith; + opal_dss_increment_fn_t increment; + opal_dss_decrement_fn_t decrement; + opal_dss_set_buffer_type_fn_t set_buffer_type; + opal_dss_pack_fn_t pack; + opal_dss_unpack_fn_t unpack; + opal_dss_copy_fn_t copy; + opal_dss_compare_fn_t compare; + opal_dss_size_fn_t size; + opal_dss_print_fn_t print; + opal_dss_release_fn_t release; + opal_dss_peek_next_item_fn_t peek; + opal_dss_unload_fn_t unload; + opal_dss_load_fn_t load; + opal_dss_xfer_payload_fn_t xfer_payload; + opal_dss_copy_payload_fn_t copy_payload; + opal_dss_register_fn_t register_type; + opal_dss_lookup_data_type_fn_t lookup_data_type; + opal_dss_dump_data_types_fn_t dump_data_types; + opal_dss_dump_fn_t dump; + opal_dss_pack_buffer_fn_t pack_buffer; + opal_dss_unpack_buffer_fn_t unpack_buffer; }; -typedef struct orte_dss_t orte_dss_t; +typedef struct opal_dss_t opal_dss_t; -ORTE_DECLSPEC extern orte_dss_t orte_dss; /* holds dss function pointers */ +OPAL_DECLSPEC extern opal_dss_t opal_dss; /* holds dss function pointers */ -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS -#endif /* ORTE_DSS_H */ +#endif /* OPAL_DSS_H */ diff --git a/opal/dss/dss_arith.c b/opal/dss/dss_arith.c new file mode 100644 index 0000000000..856cdb5506 --- /dev/null +++ b/opal/dss/dss_arith.c @@ -0,0 +1,666 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/dss/dss_internal.h" + +static void opal_dss_arith_int(int *value, int *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_uint(uint *value, uint *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_size(size_t *value, size_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_pid(pid_t *value, pid_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_byte(uint8_t *value, uint8_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_int8(int8_t *value, int8_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_int16(int16_t *value, int16_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_uint16(uint16_t *value, uint16_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_int32(int32_t *value, int32_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_uint32(uint32_t *value, uint32_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_int64(int64_t *value, int64_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_uint64(uint64_t *value, uint64_t *operand, opal_dss_arith_op_t operation); + +/* some weird ones - but somebody *might* want to do it, I suppose... */ +static void opal_dss_arith_data_type(opal_data_type_t *value, opal_data_type_t *operand, opal_dss_arith_op_t operation); + +int opal_dss_arith(opal_dss_value_t *value, opal_dss_value_t *operand, opal_dss_arith_op_t operation) +{ + /* check for error */ + if (NULL == value || NULL == operand) { + return OPAL_ERR_BAD_PARAM; + } + if (operand->type != value->type) { + return OPAL_ERR_TYPE_MISMATCH; + } + + /* Lookup the arith function for this type and call it */ + + switch(operand->type) { + case OPAL_INT: + opal_dss_arith_int((int*)value->data, (int*)operand->data, operation); + break; + + case OPAL_UINT: + opal_dss_arith_uint((uint*)value->data, (uint*)operand->data, operation); + break; + + case OPAL_SIZE: + opal_dss_arith_size((size_t*)value->data, (size_t*)operand->data, operation); + break; + + case OPAL_PID: + opal_dss_arith_pid((pid_t*)value->data, (pid_t*)operand->data, operation); + break; + + case OPAL_BYTE: + case OPAL_UINT8: + opal_dss_arith_byte((uint8_t*)value->data, (uint8_t*)operand->data, operation); + break; + + case OPAL_INT8: + opal_dss_arith_int8((int8_t*)value->data, (int8_t*)operand->data, operation); + break; + + case OPAL_INT16: + opal_dss_arith_int16((int16_t*)value->data, (int16_t*)operand->data, operation); + break; + + case OPAL_UINT16: + opal_dss_arith_uint16((uint16_t*)value->data, (uint16_t*)operand->data, operation); + break; + + case OPAL_INT32: + opal_dss_arith_int32((int32_t*)value->data, (int32_t*)operand->data, operation); + break; + + case OPAL_UINT32: + opal_dss_arith_uint32((uint32_t*)value->data, (uint32_t*)operand->data, operation); + break; + + case OPAL_INT64: + opal_dss_arith_int64((int64_t*)value->data, (int64_t*)operand->data, operation); + break; + + case OPAL_UINT64: + opal_dss_arith_uint64((uint64_t*)value->data, (uint64_t*)operand->data, operation); + break; + + default: + return OPAL_ERR_OPERATION_UNSUPPORTED; + } + + return OPAL_SUCCESS; +} + +int opal_dss_increment(opal_dss_value_t *value) +{ + int one; + unsigned int uone; + size_t sone; + pid_t pone; + uint8_t u8one; + int8_t i8one; + uint16_t u16one; + int16_t i16one; + uint32_t u32one; + int32_t i32one; + uint64_t u64one; + int64_t i64one; + opal_data_type_t datatypeone; + + /* check for error */ + if (NULL == value) { + return OPAL_ERR_BAD_PARAM; + } + /* Lookup the arith function for this type and call it */ + + switch(value->type) { + case OPAL_INT: + one = 1; + opal_dss_arith_int((int*)value->data, &one, OPAL_DSS_ADD); + break; + + case OPAL_UINT: + uone = 1; + opal_dss_arith_uint((uint*)value->data, &uone, OPAL_DSS_ADD); + break; + + case OPAL_SIZE: + sone = 1; + opal_dss_arith_size((size_t*)value->data, &sone, OPAL_DSS_ADD); + break; + + case OPAL_PID: + pone = 1; + opal_dss_arith_pid((pid_t*)value->data, &pone, OPAL_DSS_ADD); + break; + + case OPAL_BYTE: + case OPAL_UINT8: + u8one = 1; + opal_dss_arith_byte((uint8_t*)value->data, &u8one, OPAL_DSS_ADD); + break; + + case OPAL_INT8: + i8one = 1; + opal_dss_arith_int8((int8_t*)value->data, &i8one, OPAL_DSS_ADD); + break; + + case OPAL_INT16: + i16one = 1; + opal_dss_arith_int16((int16_t*)value->data, &i16one, OPAL_DSS_ADD); + break; + + case OPAL_UINT16: + u16one = 1; + opal_dss_arith_uint16((uint16_t*)value->data, &u16one, OPAL_DSS_ADD); + break; + + case OPAL_INT32: + i32one = 1; + opal_dss_arith_int32((int32_t*)value->data, &i32one, OPAL_DSS_ADD); + break; + + case OPAL_UINT32: + u32one = 1; + opal_dss_arith_uint32((uint32_t*)value->data, &u32one, OPAL_DSS_ADD); + break; + + case OPAL_INT64: + i64one = 1; + opal_dss_arith_int64((int64_t*)value->data, &i64one, OPAL_DSS_ADD); + break; + + case OPAL_UINT64: + u64one = 1; + opal_dss_arith_uint64((uint64_t*)value->data, &u64one, OPAL_DSS_ADD); + break; + + case OPAL_DATA_TYPE: + datatypeone = 1; + opal_dss_arith_data_type((opal_data_type_t*)value->data, &datatypeone, OPAL_DSS_ADD); + break; + + default: + return OPAL_ERR_OPERATION_UNSUPPORTED; + } + + return OPAL_SUCCESS; +} + +int opal_dss_decrement(opal_dss_value_t *value) +{ + int one; + unsigned int uone; + size_t sone; + pid_t pone; + uint8_t u8one; + int8_t i8one; + uint16_t u16one; + int16_t i16one; + uint32_t u32one; + int32_t i32one; + uint64_t u64one; + int64_t i64one; + opal_data_type_t datatypeone; + + /* check for error */ + if (NULL == value) { + return OPAL_ERR_BAD_PARAM; + } + /* Lookup the arith function for this type and call it */ + + switch(value->type) { + case OPAL_INT: + one = 1; + opal_dss_arith_int((int*)value->data, &one, OPAL_DSS_SUB); + break; + + case OPAL_UINT: + uone = 1; + opal_dss_arith_uint((uint*)value->data, &uone, OPAL_DSS_SUB); + break; + + case OPAL_SIZE: + sone = 1; + opal_dss_arith_size((size_t*)value->data, &sone, OPAL_DSS_SUB); + break; + + case OPAL_PID: + pone = 1; + opal_dss_arith_pid((pid_t*)value->data, &pone, OPAL_DSS_SUB); + break; + + case OPAL_BYTE: + case OPAL_UINT8: + u8one = 1; + opal_dss_arith_byte((uint8_t*)value->data, &u8one, OPAL_DSS_SUB); + break; + + case OPAL_INT8: + i8one = 1; + opal_dss_arith_int8((int8_t*)value->data, &i8one, OPAL_DSS_SUB); + break; + + case OPAL_INT16: + i16one = 1; + opal_dss_arith_int16((int16_t*)value->data, &i16one, OPAL_DSS_SUB); + break; + + case OPAL_UINT16: + u16one = 1; + opal_dss_arith_uint16((uint16_t*)value->data, &u16one, OPAL_DSS_SUB); + break; + + case OPAL_INT32: + i32one = 1; + opal_dss_arith_int32((int32_t*)value->data, &i32one, OPAL_DSS_SUB); + break; + + case OPAL_UINT32: + u32one = 1; + opal_dss_arith_uint32((uint32_t*)value->data, &u32one, OPAL_DSS_SUB); + break; + + case OPAL_INT64: + i64one = 1; + opal_dss_arith_int64((int64_t*)value->data, &i64one, OPAL_DSS_SUB); + break; + + case OPAL_UINT64: + u64one = 1; + opal_dss_arith_uint64((uint64_t*)value->data, &u64one, OPAL_DSS_SUB); + break; + + case OPAL_DATA_TYPE: + datatypeone = 1; + opal_dss_arith_data_type((opal_data_type_t*)value->data, &datatypeone, OPAL_DSS_SUB); + break; + + default: + return OPAL_ERR_OPERATION_UNSUPPORTED; + } + + return OPAL_SUCCESS; +} + +/* + * NUMERIC arith FUNCTIONS + */ +static void opal_dss_arith_int(int *value, int *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_uint(uint *value, uint *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_size(size_t *value, size_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_pid(pid_t *value, pid_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_byte(uint8_t *value, uint8_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_int8(int8_t *value, int8_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_int16(int16_t *value, int16_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_uint16(uint16_t *value, uint16_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_int32(int32_t *value, int32_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_uint32(uint32_t *value, uint32_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_int64(int64_t *value, int64_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_uint64(uint64_t *value, uint64_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_data_type(opal_data_type_t *value, opal_data_type_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + diff --git a/opal/dss/dss_compare.c b/opal/dss/dss_compare.c new file mode 100644 index 0000000000..775927778b --- /dev/null +++ b/opal/dss/dss_compare.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include + +#include "opal/dss/dss_internal.h" + +int opal_dss_compare(void *value1, void *value2, opal_data_type_t type) +{ + opal_dss_type_info_t *info; + + /* check for error */ + if (NULL == value1 || NULL == value2) { + return OPAL_ERR_BAD_PARAM; + } + + /* Lookup the compare function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNKNOWN_DATA_TYPE; + } + + return info->odti_compare_fn(value1, value2, type); +} + +/* + * NUMERIC COMPARE FUNCTIONS + */ +int opal_dss_compare_int(int *value1, int *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint(unsigned int *value1, unsigned int *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_size(size_t *value1, size_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_pid(pid_t *value1, pid_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_byte(char *value1, char *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_char(char *value1, char *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_int8(int8_t *value1, int8_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint8(uint8_t *value1, uint8_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_int16(int16_t *value1, int16_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint16(uint16_t *value1, uint16_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_int32(int32_t *value1, int32_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint32(uint32_t *value1, uint32_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_int64(int64_t *value1, int64_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint64(uint64_t *value1, uint64_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +/* + * NON-NUMERIC SYSTEM TYPES + */ + +/* NULL */ +int opal_dss_compare_null(char *value1, char *value2, opal_data_type_t type) +{ + return OPAL_EQUAL; +} + +/* BOOL */ +int opal_dss_compare_bool(bool *value1, bool *value2, opal_data_type_t type) +{ + if (*value1 && !(*value2)) return OPAL_VALUE1_GREATER; + + if (*value2 && !(*value1)) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; + +} + +/* STRING */ +int opal_dss_compare_string(char *value1, char *value2, opal_data_type_t type) +{ + if (0 < strcmp(value1, value2)) return OPAL_VALUE2_GREATER; + + if (0 > strcmp(value1, value2)) return OPAL_VALUE1_GREATER; + + return OPAL_EQUAL; +} + +/* COMPARE FUNCTIONS FOR GENERIC OPAL TYPES */ +/* OPAL_DATA_TYPE */ +int opal_dss_compare_dt(opal_data_type_t *value1, opal_data_type_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +/* OPAL_DATA_VALUE */ +int opal_dss_compare_data_value(opal_dss_value_t *value1, opal_dss_value_t *value2, opal_data_type_t type) +{ + /* can't compare if the two types don't match */ + if (value1->type != value2->type) { + return OPAL_ERR_TYPE_MISMATCH; + } + + /* okay, go ahead and compare the values themselves */ + return opal_dss.compare(value1->data, value2->data, value1->type); +} + +/* OPAL_BYTE_OBJECT */ +int opal_dss_compare_byte_object(opal_byte_object_t *value1, opal_byte_object_t *value2, opal_data_type_t type) +{ + int checksum, diff; + int32_t i; + + /* compare the sizes first - bigger size object is "greater than" */ + if (value1->size > value2->size) return OPAL_VALUE1_GREATER; + + if (value2->size > value1->size) return OPAL_VALUE2_GREATER; + + /* get here if the two sizes are identical - now do a simple checksum-style + * calculation to determine "biggest" + */ + checksum = 0; + + for (i=0; i < value1->size; i++) { + /* protect against overflows */ + diff = value1->bytes[i] - value2->bytes[i]; + if (INT_MAX-abs(checksum)-abs(diff) < 0) { /* got an overflow condition */ + checksum = 0; + } + checksum += diff; + } + + if (0 > checksum) return OPAL_VALUE2_GREATER; /* sum of value2 bytes was greater */ + + if (0 < checksum) return OPAL_VALUE1_GREATER; /* of value1 bytes was greater */ + + return OPAL_EQUAL; /* sum of both value's bytes was identical */ +} diff --git a/opal/dss/dss_copy.c b/opal/dss/dss_copy.c new file mode 100644 index 0000000000..09dbc071ac --- /dev/null +++ b/opal/dss/dss_copy.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/dss/dss_internal.h" + +int opal_dss_copy(void **dest, void *src, opal_data_type_t type) +{ + opal_dss_type_info_t *info; + + /* check for error */ + if (NULL == dest) { + return OPAL_ERR_BAD_PARAM; + } + if (NULL == src && (OPAL_NULL != type && OPAL_STRING != type)) { + return OPAL_ERR_BAD_PARAM; + } + + /* Lookup the copy function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNKNOWN_DATA_TYPE; + } + + return info->odti_copy_fn(dest, src, type); +} + +/* + * STANDARD COPY FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED + */ +int opal_dss_std_copy(void **dest, void *src, opal_data_type_t type) +{ + size_t datasize; + uint8_t *val = NULL; + + switch(type) { + case OPAL_BOOL: + datasize = sizeof(bool); + break; + + case OPAL_INT: + case OPAL_UINT: + datasize = sizeof(int); + break; + + case OPAL_SIZE: + datasize = sizeof(size_t); + break; + + case OPAL_PID: + datasize = sizeof(pid_t); + break; + + case OPAL_BYTE: + case OPAL_INT8: + case OPAL_UINT8: + datasize = 1; + break; + + case OPAL_INT16: + case OPAL_UINT16: + datasize = 2; + break; + + case OPAL_INT32: + case OPAL_UINT32: + datasize = 4; + break; + + case OPAL_INT64: + case OPAL_UINT64: + datasize = 8; + break; + + case OPAL_DATA_TYPE: + datasize = sizeof(opal_data_type_t); + break; + + default: + return OPAL_ERR_UNKNOWN_DATA_TYPE; + } + + val = (uint8_t*)malloc(datasize); + if (NULL == val) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + memcpy(val, src, datasize); + *dest = val; + + return OPAL_SUCCESS; +} + +/* COPY FUNCTIONS FOR NON-STANDARD SYSTEM TYPES */ + +/* + * NULL + */ +int opal_dss_copy_null(char **dest, char *src, opal_data_type_t type) +{ + char *val; + + *dest = (char*)malloc(sizeof(char*)); + if (NULL == *dest) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + val = *dest; /* save the address of the value */ + + /* set the dest to null */ + *val = 0x00; + + return OPAL_SUCCESS; +} + +/* + * STRING + */ +int opal_dss_copy_string(char **dest, char *src, opal_data_type_t type) +{ + if (NULL == src) { /* got zero-length string/NULL pointer - store NULL */ + *dest = NULL; + } else { + *dest = strdup(src); + } + + return OPAL_SUCCESS; +} + +/* COPY FUNCTIONS FOR GENERIC OPAL TYPES */ + +/* + * OPAL_DATA_VALUE + */ +int opal_dss_copy_data_value(opal_dss_value_t **dest, opal_dss_value_t *src, + opal_data_type_t type) +{ + int rc; + + /* create the new object */ + *dest = OBJ_NEW(opal_dss_value_t); + if (NULL == *dest) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + (*dest)->type = src->type; + + /* copy the payload with its associated copy function */ + if (OPAL_SUCCESS != (rc = opal_dss.copy(&((*dest)->data), src->data, src->type))) { + OBJ_RELEASE(*dest); + return rc; + } + + return OPAL_SUCCESS; +} + + +/* + * OPAL_BYTE_OBJECT + */ +int opal_dss_copy_byte_object(opal_byte_object_t **dest, opal_byte_object_t *src, + opal_data_type_t type) +{ + /* allocate space for the new object */ + *dest = (opal_byte_object_t*)malloc(sizeof(opal_byte_object_t)); + if (NULL == *dest) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + (*dest)->size = src->size; + + /* allocate the required space for the bytes */ + (*dest)->bytes = (uint8_t*)malloc(src->size); + if (NULL == (*dest)->bytes) { + OBJ_RELEASE(*dest); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* copy the data across */ + memcpy((*dest)->bytes, src->bytes, src->size); + + return OPAL_SUCCESS; +} diff --git a/orte/dss/dss_dump.c b/opal/dss/dss_dump.c similarity index 57% rename from orte/dss/dss_dump.c rename to opal/dss/dss_dump.c index b793ef609b..6d356420ee 100644 --- a/orte/dss/dss_dump.c +++ b/opal/dss/dss_dump.c @@ -14,52 +14,46 @@ * $HEADER$ */ -#include "orte_config.h" -#include "orte/orte_types.h" +#include "opal_config.h" #include "opal/util/output.h" -#include "orte/mca/errmgr/errmgr.h" +#include "opal/dss/dss_internal.h" -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - - -int orte_dss_dump(int output_stream, void *src, orte_data_type_t type) +int opal_dss_dump(int output_stream, void *src, opal_data_type_t type) { char *sptr; int rc; - if (ORTE_SUCCESS != (rc = orte_dss.print(&sptr, NULL, src, type))) { - ORTE_ERROR_LOG(rc); + if (OPAL_SUCCESS != (rc = opal_dss.print(&sptr, NULL, src, type))) { return rc; } opal_output(output_stream, "%s", sptr); free(sptr); - return ORTE_SUCCESS; + return OPAL_SUCCESS; } -void orte_dss_dump_data_types(int output) +void opal_dss_dump_data_types(int output) { - orte_dss_type_info_t **ptr; - orte_data_type_t j; - orte_std_cntr_t i; + opal_dss_type_info_t *ptr; + opal_data_type_t j; + int32_t i; opal_output(output, "DUMP OF REGISTERED DATA TYPES"); - ptr = (orte_dss_type_info_t**)(orte_dss_types->addr); - for (i=0, j=0; j < orte_dss_num_reg_types && - i < orte_dss_types->size; i++) { - if (NULL != ptr[i]) { + j = 0; + for (i=0; i < opal_pointer_array_get_size(&opal_dss_types); i++) { + ptr = opal_pointer_array_get_item(&opal_dss_types, i); + if (NULL != ptr) { j++; /* print out the info */ opal_output(output, "\tIndex: %lu\tData type: %lu\tName: %s", (unsigned long)j, - (unsigned long)ptr[i]->odti_type, - ptr[i]->odti_name); + (unsigned long)ptr->odti_type, + ptr->odti_name); } } } diff --git a/orte/dss/dss_get.c b/opal/dss/dss_get.c similarity index 71% rename from orte/dss/dss_get.c rename to opal/dss/dss_get.c index 21698d1079..1a1a007edb 100644 --- a/orte/dss/dss_get.c +++ b/opal/dss/dss_get.c @@ -14,24 +14,16 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" +#include "opal/dss/dss_internal.h" #include "opal/util/output.h" -int orte_dss_get(void **data, orte_data_value_t *value, orte_data_type_t type) +int opal_dss_get(void **data, opal_dss_value_t *value, opal_data_type_t type) { /* check for error */ if (NULL == value || NULL == data) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* okay, we assume that the user has provided memory for the destination. @@ -41,13 +33,12 @@ int orte_dss_get(void **data, orte_data_value_t *value, orte_data_type_t type) * type of data being requested */ if (type != value->type) { - ORTE_ERROR_LOG(ORTE_ERR_TYPE_MISMATCH); - return ORTE_ERR_TYPE_MISMATCH; + return OPAL_ERR_TYPE_MISMATCH; } /* point the destination at the value */ *data = value->data; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/opal/dss/dss_internal.h b/opal/dss/dss_internal.h new file mode 100644 index 0000000000..f856a44ec9 --- /dev/null +++ b/opal/dss/dss_internal.h @@ -0,0 +1,477 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef OPAL_DSS_INTERNAL_H_ +#define OPAL_DSS_INTERNAL_H_ + +#include "opal_config.h" +#include "opal/constants.h" + +#include "opal/class/opal_pointer_array.h" + +#include "opal/dss/dss.h" + +#if HAVE_STRING_H +# if !defined(STDC_HEADERS) && HAVE_MEMORY_H +# include +# endif +# include +#endif + +BEGIN_C_DECLS + +/* + * The default starting chunk size + */ +#define OPAL_DSS_DEFAULT_INITIAL_SIZE 128 +/* + * The default threshold size when we switch from doubling the + * buffer size to addatively increasing it + */ +#define OPAL_DSS_DEFAULT_THRESHOLD_SIZE 1024 + +/* + * Internal type corresponding to size_t. Do not use this in + * interface calls - use OPAL_SIZE instead. + */ +#if SIZEOF_SIZE_T == 1 +#define DSS_TYPE_SIZE_T OPAL_UINT8 +#elif SIZEOF_SIZE_T == 2 +#define DSS_TYPE_SIZE_T OPAL_UINT16 +#elif SIZEOF_SIZE_T == 4 +#define DSS_TYPE_SIZE_T OPAL_UINT32 +#elif SIZEOF_SIZE_T == 8 +#define DSS_TYPE_SIZE_T OPAL_UINT64 +#else +#error Unsupported size_t size! +#endif + +/* + * Internal type corresponding to bool. Do not use this in interface + * calls - use OPAL_BOOL instead. + */ +#if SIZEOF_BOOL == 1 +#define DSS_TYPE_BOOL OPAL_UINT8 +#elif SIZEOF_BOOL == 2 +#define DSS_TYPE_BOOL OPAL_UINT16 +#elif SIZEOF_BOOL == 4 +#define DSS_TYPE_BOOL OPAL_UINT32 +#elif SIZEOF_BOOL == 8 +#define DSS_TYPE_BOOL OPAL_UINT64 +#else +#error Unsupported bool size! +#endif + +/* + * Internal type corresponding to int and unsigned int. Do not use + * this in interface calls - use OPAL_INT / OPAL_UINT instead. + */ +#if SIZEOF_INT == 1 +#define DSS_TYPE_INT OPAL_INT8 +#define DSS_TYPE_UINT OPAL_UINT8 +#elif SIZEOF_INT == 2 +#define DSS_TYPE_INT OPAL_INT16 +#define DSS_TYPE_UINT OPAL_UINT16 +#elif SIZEOF_INT == 4 +#define DSS_TYPE_INT OPAL_INT32 +#define DSS_TYPE_UINT OPAL_UINT32 +#elif SIZEOF_INT == 8 +#define DSS_TYPE_INT OPAL_INT64 +#define DSS_TYPE_UINT OPAL_UINT64 +#else +#error Unsupported int size! +#endif + +/* + * Internal type corresponding to pid_t. Do not use this in interface + * calls - use OPAL_PID instead. + */ +#if SIZEOF_PID_T == 1 +#define DSS_TYPE_PID_T OPAL_UINT8 +#elif SIZEOF_PID_T == 2 +#define DSS_TYPE_PID_T OPAL_UINT16 +#elif SIZEOF_PID_T == 4 +#define DSS_TYPE_PID_T OPAL_UINT32 +#elif SIZEOF_PID_T == 8 +#define DSS_TYPE_PID_T OPAL_UINT64 +#else +#error Unsupported pid_t size! +#endif + +/* Unpack generic size macros */ +#define UNPACK_SIZE_MISMATCH(unpack_type, remote_type, ret) \ +do { \ + switch(remote_type) { \ + case OPAL_UINT8: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint8_t, remote_type); \ + break; \ + case OPAL_INT8: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int8_t, remote_type); \ + break; \ + case OPAL_UINT16: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint16_t, remote_type); \ + break; \ + case OPAL_INT16: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int16_t, remote_type); \ + break; \ + case OPAL_UINT32: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint32_t, remote_type); \ + break; \ + case OPAL_INT32: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int32_t, remote_type); \ + break; \ + case OPAL_UINT64: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint64_t, remote_type); \ + break; \ + case OPAL_INT64: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int64_t, remote_type); \ + break; \ + default: \ + ret = OPAL_ERR_NOT_FOUND; \ + } \ +} while (0) + +/* NOTE: do not need to deal with endianness here, as the unpacking of +the underling sender-side type will do that for us. Repeat: the +data in tmpbuf[] is already in host byte order. */ +#define UNPACK_SIZE_MISMATCH_FOUND(unpack_type, tmptype, tmpdsstype) \ +do { \ + int32_t i; \ + tmptype *tmpbuf = (tmptype*)malloc(sizeof(tmptype) * (*num_vals)); \ + ret = opal_dss_unpack_buffer(buffer, tmpbuf, num_vals, tmpdsstype); \ + for (i = 0 ; i < *num_vals ; ++i) { \ + ((unpack_type*) dest)[i] = (unpack_type)(tmpbuf[i]); \ + } \ + free(tmpbuf); \ +} while (0) + + +/** + * Internal struct used for holding registered dss functions + */ +struct opal_dss_type_info_t { + opal_object_t super; + /* type identifier */ + opal_data_type_t odti_type; + /** Debugging string name */ + char *odti_name; + /** Pack function */ + opal_dss_pack_fn_t odti_pack_fn; + /** Unpack function */ + opal_dss_unpack_fn_t odti_unpack_fn; + /** copy function */ + opal_dss_copy_fn_t odti_copy_fn; + /** compare function */ + opal_dss_compare_fn_t odti_compare_fn; + /** size function */ + opal_dss_size_fn_t odti_size_fn; + /** print function */ + opal_dss_print_fn_t odti_print_fn; + /** Release function */ + opal_dss_release_fn_t odti_release_fn; + /** flag to indicate structured data */ + bool odti_structured; +}; +/** + * Convenience typedef + */ +typedef struct opal_dss_type_info_t opal_dss_type_info_t; +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_dss_type_info_t); + +/* + * globals needed within dss + */ +extern bool opal_dss_initialized; +extern bool opal_dss_debug; +extern int opal_dss_verbose; +extern int opal_dss_initial_size; +extern int opal_dss_threshold_size; +extern opal_pointer_array_t opal_dss_types; +extern opal_data_type_t opal_dss_num_reg_types; + + /* + * Implementations of API functions + */ + + int opal_dss_set(opal_dss_value_t *value, void *new_value, opal_data_type_t type); + + int opal_dss_get(void **data, opal_dss_value_t *value, opal_data_type_t type); + + int opal_dss_arith(opal_dss_value_t *value, opal_dss_value_t *operand, opal_dss_arith_op_t operation); + + int opal_dss_increment(opal_dss_value_t *value); + + int opal_dss_decrement(opal_dss_value_t *value); + + int opal_dss_set_buffer_type(opal_buffer_t *buffer, opal_dss_buffer_type_t type); + + int opal_dss_pack(opal_buffer_t *buffer, const void *src, + int32_t num_vals, + opal_data_type_t type); + int opal_dss_unpack(opal_buffer_t *buffer, void *dest, + int32_t *max_num_vals, + opal_data_type_t type); + + int opal_dss_copy(void **dest, void *src, opal_data_type_t type); + + int opal_dss_compare(void *value1, void *value2, + opal_data_type_t type); + + int opal_dss_print(char **output, char *prefix, void *src, opal_data_type_t type); + + int opal_dss_dump(int output_stream, void *src, opal_data_type_t type); + + int opal_dss_size(size_t *size, void *src, opal_data_type_t type); + + int opal_dss_peek(opal_buffer_t *buffer, opal_data_type_t *type, + int32_t *number); + + int opal_dss_peek_type(opal_buffer_t *buffer, opal_data_type_t *type); + + int opal_dss_unload(opal_buffer_t *buffer, void **payload, + int32_t *bytes_used); + int opal_dss_load(opal_buffer_t *buffer, void *payload, int32_t bytes_used); + + int opal_dss_xfer_payload(opal_buffer_t *dest, opal_buffer_t *src); + + int opal_dss_copy_payload(opal_buffer_t *dest, opal_buffer_t *src); + + int opal_dss_register(opal_dss_pack_fn_t pack_fn, + opal_dss_unpack_fn_t unpack_fn, + opal_dss_copy_fn_t copy_fn, + opal_dss_compare_fn_t compare_fn, + opal_dss_size_fn_t size_fn, + opal_dss_print_fn_t print_fn, + opal_dss_release_fn_t release_fn, + bool structured, + const char *name, opal_data_type_t *type); + + void opal_dss_release(opal_dss_value_t *value); + + char *opal_dss_lookup_data_type(opal_data_type_t type); + + void opal_dss_dump_data_types(int output); + + /* + * Specialized API functions + */ + int opal_dss_pack_buffer(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_unpack_buffer(opal_buffer_t *buffer, void *dst, + int32_t *num_vals, opal_data_type_t type); + + /* + * Internal pack functions + */ + + int opal_dss_pack_null(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + int opal_dss_pack_byte(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_bool(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_int(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + int opal_dss_pack_int16(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + int opal_dss_pack_int32(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + int opal_dss_pack_int64(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_sizet(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_pid(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_string(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_data_type(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_data_value(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_byte_object(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + /* + * Internal unpack functions + */ + + int opal_dss_unpack_null(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + int opal_dss_unpack_byte(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_bool(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_int(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + int opal_dss_unpack_int16(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + int opal_dss_unpack_int32(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + int opal_dss_unpack_int64(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_sizet(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_pid(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_string(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_data_type(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_data_value(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_byte_object(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + /* + * Internal copy functions + */ + + int opal_dss_std_copy(void **dest, void *src, opal_data_type_t type); + + int opal_dss_copy_null(char **dest, char *src, opal_data_type_t type); + + int opal_dss_copy_string(char **dest, char *src, opal_data_type_t type); + + int opal_dss_copy_byte_object(opal_byte_object_t **dest, opal_byte_object_t *src, + opal_data_type_t type); + + int opal_dss_copy_data_value(opal_dss_value_t **dest, opal_dss_value_t *src, + opal_data_type_t type); + /* + * Internal compare functions + */ + + int opal_dss_compare_bool(bool *value1, bool *value2, opal_data_type_t type); + + int opal_dss_compare_int(int *value1, int *value2, opal_data_type_t type); + int opal_dss_compare_uint(unsigned int *value1, unsigned int *value2, opal_data_type_t type); + + int opal_dss_compare_size(size_t *value1, size_t *value2, opal_data_type_t type); + + int opal_dss_compare_pid(pid_t *value1, pid_t *value2, opal_data_type_t type); + + int opal_dss_compare_byte(char *value1, char *value2, opal_data_type_t type); + int opal_dss_compare_char(char *value1, char *value2, opal_data_type_t type); + int opal_dss_compare_int8(int8_t *value1, int8_t *value2, opal_data_type_t type); + int opal_dss_compare_uint8(uint8_t *value1, uint8_t *value2, opal_data_type_t type); + + int opal_dss_compare_int16(int16_t *value1, int16_t *value2, opal_data_type_t type); + int opal_dss_compare_uint16(uint16_t *value1, uint16_t *value2, opal_data_type_t type); + + int opal_dss_compare_int32(int32_t *value1, int32_t *value2, opal_data_type_t type); + int opal_dss_compare_uint32(uint32_t *value1, uint32_t *value2, opal_data_type_t type); + + int opal_dss_compare_int64(int64_t *value1, int64_t *value2, opal_data_type_t type); + int opal_dss_compare_uint64(uint64_t *value1, uint64_t *value2, opal_data_type_t type); + + int opal_dss_compare_null(char *value1, char *value2, opal_data_type_t type); + + int opal_dss_compare_string(char *value1, char *value2, opal_data_type_t type); + + int opal_dss_compare_dt(opal_data_type_t *value1, opal_data_type_t *value2, opal_data_type_t type); + + int opal_dss_compare_data_value(opal_dss_value_t *value1, opal_dss_value_t *value2, opal_data_type_t type); + + int opal_dss_compare_byte_object(opal_byte_object_t *value1, opal_byte_object_t *value2, opal_data_type_t type); + + /* + * Internal size functions + */ + int opal_dss_std_size(size_t *size, void *src, opal_data_type_t type); + + int opal_dss_size_string(size_t *size, char *src, opal_data_type_t type); + + int opal_dss_size_data_value(size_t *size, opal_dss_value_t *src, opal_data_type_t type); + + int opal_dss_size_byte_object(size_t *size, opal_byte_object_t *src, opal_data_type_t type); + + /* + * Internal print functions + */ + int opal_dss_print_byte(char **output, char *prefix, uint8_t *src, opal_data_type_t type); + + int opal_dss_print_string(char **output, char *prefix, char *src, opal_data_type_t type); + + int opal_dss_print_size(char **output, char *prefix, size_t *src, opal_data_type_t type); + int opal_dss_print_pid(char **output, char *prefix, pid_t *src, opal_data_type_t type); + int opal_dss_print_bool(char **output, char *prefix, bool *src, opal_data_type_t type); + int opal_dss_print_int(char **output, char *prefix, int *src, opal_data_type_t type); + int opal_dss_print_uint(char **output, char *prefix, int *src, opal_data_type_t type); + int opal_dss_print_uint8(char **output, char *prefix, uint8_t *src, opal_data_type_t type); + int opal_dss_print_uint16(char **output, char *prefix, uint16_t *src, opal_data_type_t type); + int opal_dss_print_uint32(char **output, char *prefix, uint32_t *src, opal_data_type_t type); + int opal_dss_print_int8(char **output, char *prefix, int8_t *src, opal_data_type_t type); + int opal_dss_print_int16(char **output, char *prefix, int16_t *src, opal_data_type_t type); + int opal_dss_print_int32(char **output, char *prefix, int32_t *src, opal_data_type_t type); +#ifdef HAVE_INT64_T + int opal_dss_print_uint64(char **output, char *prefix, uint64_t *src, opal_data_type_t type); + int opal_dss_print_int64(char **output, char *prefix, int64_t *src, opal_data_type_t type); +#else + int opal_dss_print_uint64(char **output, char *prefix, void *src, opal_data_type_t type); + int opal_dss_print_int64(char **output, char *prefix, void *src, opal_data_type_t type); +#endif + int opal_dss_print_null(char **output, char *prefix, void *src, opal_data_type_t type); + int opal_dss_print_data_type(char **output, char *prefix, opal_data_type_t *src, opal_data_type_t type); + int opal_dss_print_data_value(char **output, char *prefix, opal_dss_value_t *src, opal_data_type_t type); + int opal_dss_print_byte_object(char **output, char *prefix, opal_byte_object_t *src, opal_data_type_t type); + + + /* + * Internal release functions + */ + void opal_dss_std_release(opal_dss_value_t *value); + + void opal_dss_std_obj_release(opal_dss_value_t *value); + + void opal_dss_release_byte_object(opal_dss_value_t *value); + + /* + * Internal helper functions + */ + + char* opal_dss_buffer_extend(opal_buffer_t *bptr, size_t bytes_to_add); + + bool opal_dss_too_small(opal_buffer_t *buffer, size_t bytes_reqd); + + opal_dss_type_info_t* opal_dss_find_type(opal_data_type_t type); + + int opal_dss_store_data_type(opal_buffer_t *buffer, opal_data_type_t type); + + int opal_dss_get_data_type(opal_buffer_t *buffer, opal_data_type_t *type); + +END_C_DECLS + +#endif diff --git a/orte/dss/dss_internal_functions.c b/opal/dss/dss_internal_functions.c similarity index 62% rename from orte/dss/dss_internal_functions.c rename to opal/dss/dss_internal_functions.c index 6c1a1861b7..d8e31a2850 100644 --- a/orte/dss/dss_internal_functions.c +++ b/opal/dss/dss_internal_functions.c @@ -16,28 +16,22 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" #include #ifdef HAVE_UNISTD_H #include #endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include "opal/util/output.h" +#include "opal/class/opal_pointer_array.h" -#include "orte/class/orte_pointer_array.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" +#include "opal/dss/dss_internal.h" /** * Internal function that resizes (expands) an inuse buffer if * necessary. */ -char* orte_dss_buffer_extend(orte_buffer_t *buffer, size_t bytes_to_add) +char* opal_dss_buffer_extend(opal_buffer_t *buffer, size_t bytes_to_add) { size_t required, to_alloc; size_t pack_offset, unpack_offset; @@ -49,13 +43,13 @@ char* orte_dss_buffer_extend(orte_buffer_t *buffer, size_t bytes_to_add) } required = buffer->bytes_used + bytes_to_add; - if(required >= (size_t)orte_dss_threshold_size) { - to_alloc = ((required + orte_dss_threshold_size - 1) - / orte_dss_threshold_size) * orte_dss_threshold_size; + if(required >= (size_t)opal_dss_threshold_size) { + to_alloc = ((required + opal_dss_threshold_size - 1) + / opal_dss_threshold_size) * opal_dss_threshold_size; } else { to_alloc = buffer->bytes_allocated; if(0 == to_alloc) { - to_alloc = orte_dss_initial_size; + to_alloc = opal_dss_initial_size; } while(to_alloc < required) { to_alloc <<= 1; @@ -75,7 +69,6 @@ char* orte_dss_buffer_extend(orte_buffer_t *buffer, size_t bytes_to_add) } if (NULL == buffer->base_ptr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return NULL; } buffer->pack_ptr = ((char*) buffer->base_ptr) + pack_offset; @@ -91,12 +84,11 @@ char* orte_dss_buffer_extend(orte_buffer_t *buffer, size_t bytes_to_add) * Internal function that checks to see if the specified number of bytes * remain in the buffer for unpacking */ -bool orte_dss_too_small(orte_buffer_t *buffer, size_t bytes_reqd) +bool opal_dss_too_small(opal_buffer_t *buffer, size_t bytes_reqd) { size_t bytes_remaining_packed; if (buffer->pack_ptr < buffer->unpack_ptr) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE); return true; } @@ -112,39 +104,29 @@ bool orte_dss_too_small(orte_buffer_t *buffer, size_t bytes_reqd) return false; } -int orte_dss_store_data_type(orte_buffer_t *buffer, orte_data_type_t type) +int opal_dss_store_data_type(opal_buffer_t *buffer, opal_data_type_t type) { - int rc; - orte_dss_type_info_t *info; + opal_dss_type_info_t *info; - /* Lookup the pack function for the actual orte_data_type type and call it */ + /* Lookup the pack function for the actual opal_data_type type and call it */ - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, ORTE_DATA_TYPE_T))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, OPAL_DATA_TYPE_T))) { + return OPAL_ERR_PACK_FAILURE; } - if (ORTE_SUCCESS != (rc = info->odti_pack_fn(buffer, &type, 1, ORTE_DATA_TYPE_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; + return info->odti_pack_fn(buffer, &type, 1, OPAL_DATA_TYPE_T); } -int orte_dss_get_data_type(orte_buffer_t *buffer, orte_data_type_t *type) +int opal_dss_get_data_type(opal_buffer_t *buffer, opal_data_type_t *type) { - int rc; - orte_dss_type_info_t *info; - orte_std_cntr_t n=1; + opal_dss_type_info_t *info; + int32_t n=1; - /* Lookup the unpack function for the actual orte_data_type type and call it */ + /* Lookup the unpack function for the actual opal_data_type type and call it */ - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, ORTE_DATA_TYPE_T))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, OPAL_DATA_TYPE_T))) { + return OPAL_ERR_PACK_FAILURE; } - rc = info->odti_unpack_fn(buffer, type, &n, ORTE_DATA_TYPE_T); - - return rc; + return info->odti_unpack_fn(buffer, type, &n, OPAL_DATA_TYPE_T); } diff --git a/orte/dss/dss_load_unload.c b/opal/dss/dss_load_unload.c similarity index 73% rename from orte/dss/dss_load_unload.c rename to opal/dss/dss_load_unload.c index e4d51a553b..c2e9cb98e0 100644 --- a/orte/dss/dss_load_unload.c +++ b/opal/dss/dss_load_unload.c @@ -17,59 +17,47 @@ */ /* - * DPS Buffer Operations - */ - -/** @file: - * + * DSS Buffer Operations */ +#include "opal_config.h" -#include "orte_config.h" - -#include -#ifdef HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" +#include "opal/dss/dss_internal.h" -int orte_dss_unload(orte_buffer_t *buffer, void **payload, - orte_std_cntr_t *bytes_used) +int opal_dss_unload(opal_buffer_t *buffer, void **payload, + int32_t *bytes_used) { char *hdr_dst = NULL; - orte_dss_buffer_type_t type; + opal_dss_buffer_type_t type; /* check that buffer is not null */ if (!buffer) { - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* were we given someplace to point to the payload */ if (NULL == payload) { - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* anything in the buffer - if not, nothing to do */ if (NULL == buffer->base_ptr || 0 == buffer->bytes_used) { *payload = NULL; *bytes_used = 0; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* add room for our description of the buffer -- currently just the type */ - if (NULL == (hdr_dst = orte_dss_buffer_extend(buffer, - sizeof(orte_dss_buffer_type_t)))) { - return ORTE_ERR_OUT_OF_RESOURCE; + if (NULL == (hdr_dst = opal_dss_buffer_extend(buffer, + sizeof(opal_dss_buffer_type_t)))) { + return OPAL_ERR_OUT_OF_RESOURCE; } /* add the header (at the end, so perhaps it's a footer? */ type = buffer->type; - ORTE_DSS_BUFFER_TYPE_HTON(type); - memcpy(hdr_dst, &type, sizeof(orte_dss_buffer_type_t)); - buffer->bytes_used += sizeof(orte_dss_buffer_type_t); + OPAL_DSS_BUFFER_TYPE_HTON(type); + memcpy(hdr_dst, &type, sizeof(opal_dss_buffer_type_t)); + buffer->bytes_used += sizeof(opal_dss_buffer_type_t); /* okay, we have something to provide - pass it back */ *payload = buffer->base_ptr; @@ -82,24 +70,24 @@ int orte_dss_unload(orte_buffer_t *buffer, void **payload, /* All done */ - return ORTE_SUCCESS; + return OPAL_SUCCESS; } -int orte_dss_load(orte_buffer_t *buffer, void *payload, - orte_std_cntr_t bytes_used) +int opal_dss_load(opal_buffer_t *buffer, void *payload, + int32_t bytes_used) { char *hdr_ptr; - orte_dss_buffer_type_t type; + opal_dss_buffer_type_t type; /* check to see if the buffer has been initialized */ if (NULL == buffer) { - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* check that the payload is there */ if (NULL == payload) { - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* check if buffer already has payload - free it if so */ @@ -108,11 +96,11 @@ int orte_dss_load(orte_buffer_t *buffer, void *payload, } /* get our header */ - hdr_ptr = (char*) payload + bytes_used - sizeof(orte_dss_buffer_type_t); - memcpy(&type, hdr_ptr, sizeof(orte_dss_buffer_type_t)); - ORTE_DSS_BUFFER_TYPE_NTOH(type); + hdr_ptr = (char*) payload + bytes_used - sizeof(opal_dss_buffer_type_t); + memcpy(&type, hdr_ptr, sizeof(opal_dss_buffer_type_t)); + OPAL_DSS_BUFFER_TYPE_NTOH(type); buffer->type = type; - bytes_used -= sizeof(orte_dss_buffer_type_t); + bytes_used -= sizeof(opal_dss_buffer_type_t); /* populate the buffer */ buffer->base_ptr = (char*)payload; @@ -126,7 +114,7 @@ int orte_dss_load(orte_buffer_t *buffer, void *payload, /* All done */ - return ORTE_SUCCESS; + return OPAL_SUCCESS; } @@ -139,14 +127,13 @@ int orte_dss_load(orte_buffer_t *buffer, void *payload, * looks functionally a lot more like a destructive "copy" - both for * the source and destination buffers - then a direct transfer of data! */ -int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src) +int opal_dss_xfer_payload(opal_buffer_t *dest, opal_buffer_t *src) { int rc; /* ensure we have valid source and destination */ if (NULL == dest || NULL == src) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* if the dest is already populated, release the data */ @@ -163,8 +150,7 @@ int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src) /* copy the src payload to the dest - this will allocate "fresh" * memory for the unpacked payload remaining in the src buffer */ - if (ORTE_SUCCESS != (rc = orte_dss_copy_payload(dest, src))) { - ORTE_ERROR_LOG(rc); + if (OPAL_SUCCESS != (rc = opal_dss_copy_payload(dest, src))) { return rc; } @@ -174,7 +160,7 @@ int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src) src->pack_ptr = src->unpack_ptr = NULL; src->bytes_allocated = src->bytes_used = 0; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } @@ -182,15 +168,14 @@ int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src) * The complete contents of the src buffer are NOT copied - only that * portion that has not been previously unpacked is copied. */ -int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src) +int opal_dss_copy_payload(opal_buffer_t *dest, opal_buffer_t *src) { char *dst_ptr; - orte_std_cntr_t bytes_left; + int32_t bytes_left; /* ensure we have valid source and destination */ if (NULL == dest || NULL == src) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* if the dest is already populated, check to ensure that both @@ -198,8 +183,7 @@ int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src) */ if (0 != dest->bytes_used) { if (dest->type != src->type) { - ORTE_ERROR_LOG(ORTE_ERR_BUFFER); - return ORTE_ERR_BUFFER; + return OPAL_ERR_BUFFER; } } @@ -219,12 +203,12 @@ int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src) /* if nothing is left, then nothing to do */ if (0 == bytes_left) { - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* add room to the dest for the src buffer's payload */ - if (NULL == (dst_ptr = orte_dss_buffer_extend(dest, bytes_left))) { - return ORTE_ERR_OUT_OF_RESOURCE; + if (NULL == (dst_ptr = opal_dss_buffer_extend(dest, bytes_left))) { + return OPAL_ERR_OUT_OF_RESOURCE; } /* copy the src payload to the specified location in dest */ @@ -234,6 +218,6 @@ int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src) dest->bytes_used += bytes_left; dest->pack_ptr = ((char*)dest->pack_ptr) + bytes_left; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/orte/dss/dss_lookup.c b/opal/dss/dss_lookup.c similarity index 69% rename from orte/dss/dss_lookup.c rename to opal/dss/dss_lookup.c index a2fe8bcfa0..bcb23d3113 100644 --- a/orte/dss/dss_lookup.c +++ b/opal/dss/dss_lookup.c @@ -16,25 +16,16 @@ * $HEADER$ */ -#include "orte_config.h" -#include "orte/orte_types.h" +#include "opal_config.h" -#include "orte/mca/errmgr/errmgr.h" +#include "opal/dss/dss_internal.h" -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - - -char *orte_dss_lookup_data_type(orte_data_type_t type) +char *opal_dss_lookup_data_type(opal_data_type_t type) { - orte_dss_type_info_t *info; + opal_dss_type_info_t *info; char *name; - if (!(type < orte_dss_types->size)) { - return NULL; - } - - info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type); + info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type); if (NULL != info) { /* type found on list */ name = strdup(info->odti_name); return name; diff --git a/opal/dss/dss_open_close.c b/opal/dss/dss_open_close.c new file mode 100644 index 0000000000..a10cfa12da --- /dev/null +++ b/opal/dss/dss_open_close.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ +#include "opal_config.h" + +#include "opal/mca/base/mca_base_param.h" + +#include "opal/dss/dss_internal.h" + +/** + * globals + */ +bool opal_dss_initialized = false; +bool opal_dss_debug = false; +int opal_dss_verbose = -1; /* by default disabled */ +int opal_dss_initial_size; +int opal_dss_threshold_size; +opal_pointer_array_t opal_dss_types; +opal_data_type_t opal_dss_num_reg_types; +opal_dss_buffer_type_t default_buf_type; + +opal_dss_t opal_dss = { + opal_dss_set, + opal_dss_get, + opal_dss_arith, + opal_dss_increment, + opal_dss_decrement, + opal_dss_set_buffer_type, + opal_dss_pack, + opal_dss_unpack, + opal_dss_copy, + opal_dss_compare, + opal_dss_size, + opal_dss_print, + opal_dss_release, + opal_dss_peek, + opal_dss_unload, + opal_dss_load, + opal_dss_xfer_payload, + opal_dss_copy_payload, + opal_dss_register, + opal_dss_lookup_data_type, + opal_dss_dump_data_types, + opal_dss_dump, + opal_dss_pack_buffer, + opal_dss_unpack_buffer +}; + +/** + * Object constructors, destructors, and instantiations + */ +/** Data Value **/ +/* constructor - used to initialize state of data value instance */ +static void opal_data_value_construct(opal_dss_value_t* ptr) +{ + ptr->type = OPAL_UNDEF; + ptr->data = NULL; +} +/* destructor - used to release data value instance */ +static void opal_data_value_destruct(opal_dss_value_t* ptr) +{ + if (NULL != ptr->data) { + opal_dss.release(ptr); + } +} + +/* define instance of opal_class_t */ +OBJ_CLASS_INSTANCE( + opal_dss_value_t, /* type name */ + opal_object_t, /* parent "class" name */ + opal_data_value_construct, /* constructor */ + opal_data_value_destruct); /* destructor */ + + +static void opal_buffer_construct (opal_buffer_t* buffer) +{ + /** set the default buffer type */ + buffer->type = default_buf_type; + + /* Make everything NULL to begin with */ + + buffer->base_ptr = buffer->pack_ptr = buffer->unpack_ptr = NULL; + buffer->bytes_allocated = buffer->bytes_used = 0; +} + +static void opal_buffer_destruct (opal_buffer_t* buffer) +{ + if (NULL != buffer) { + if (NULL != buffer->base_ptr) { + free (buffer->base_ptr); + } + } +} + +OBJ_CLASS_INSTANCE(opal_buffer_t, + opal_object_t, + opal_buffer_construct, + opal_buffer_destruct); + + +static void opal_dss_type_info_construct(opal_dss_type_info_t *obj) +{ + obj->odti_name = NULL; + obj->odti_pack_fn = NULL; + obj->odti_unpack_fn = NULL; + obj->odti_copy_fn = NULL; + obj->odti_compare_fn = NULL; + obj->odti_size_fn = NULL; + obj->odti_print_fn = NULL; + obj->odti_release_fn = NULL; + obj->odti_structured = false; +} + +static void opal_dss_type_info_destruct(opal_dss_type_info_t *obj) +{ + if (NULL != obj->odti_name) { + free(obj->odti_name); + } +} + +OBJ_CLASS_INSTANCE(opal_dss_type_info_t, opal_object_t, + opal_dss_type_info_construct, + opal_dss_type_info_destruct); + + +int opal_dss_open(void) +{ + char *enviro_val; + int id, rc; + opal_data_type_t tmp; + int def_type; + + if (opal_dss_initialized) { + return OPAL_SUCCESS; + } + + enviro_val = getenv("OPAL_dss_debug"); + if (NULL != enviro_val) { /* debug requested */ + opal_dss_debug = true; + } else { + opal_dss_debug = false; + } + + /** set the default buffer type. If we are in debug mode, then we default + * to fully described buffers. Otherwise, we default to non-described for brevity + * and performance + */ +#if OMPI_ENABLE_DEBUG + def_type = OPAL_DSS_BUFFER_FULLY_DESC; +#else + def_type = OPAL_DSS_BUFFER_NON_DESC; +#endif + + id = mca_base_param_register_int("dss", "buffer", "type", + "Set the default mode for OpenRTE buffers (0=non-described, 1=described)", + def_type); + mca_base_param_lookup_int(id, &rc); + default_buf_type = rc; + + /* setup the initial size of the buffer. */ + id = mca_base_param_register_int("dss", "buffer_initial", "size", NULL, + OPAL_DSS_DEFAULT_INITIAL_SIZE); + mca_base_param_lookup_int(id, &opal_dss_initial_size); + + /* the threshold as to where to stop doubling the size of the buffer + * allocated memory and start doing additive increases */ + id = mca_base_param_register_int("dss", "buffer_threshold", "size", NULL, + OPAL_DSS_DEFAULT_THRESHOLD_SIZE); + mca_base_param_lookup_int(id, &opal_dss_threshold_size); + + /* Setup the types array */ + OBJ_CONSTRUCT(&opal_dss_types, opal_pointer_array_t); + if (OPAL_SUCCESS != (rc = opal_pointer_array_init(&opal_dss_types, + OPAL_DSS_ID_DYNAMIC, + OPAL_DSS_ID_MAX, + OPAL_DSS_ID_MAX))) { + return rc; + } + opal_dss_num_reg_types = 0; + + /* Register all the intrinsic types */ + + tmp = OPAL_NULL; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_null, + opal_dss_unpack_null, + (opal_dss_copy_fn_t)opal_dss_copy_null, + (opal_dss_compare_fn_t)opal_dss_compare_null, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_null, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_NULL", &tmp))) { + return rc; + } + tmp = OPAL_BYTE; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_byte, + opal_dss_unpack_byte, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_byte, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_byte, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_BYTE", &tmp))) { + return rc; + } + tmp = OPAL_BOOL; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_bool, + opal_dss_unpack_bool, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_bool, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_bool, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_BOOL", &tmp))) { + return rc; + } + tmp = OPAL_INT; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int, + opal_dss_unpack_int, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT", &tmp))) { + return rc; + } + tmp = OPAL_UINT; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int, + opal_dss_unpack_int, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT", &tmp))) { + return rc; + } + tmp = OPAL_INT8; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_byte, + opal_dss_unpack_byte, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int8, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int8, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT8", &tmp))) { + return rc; + } + tmp = OPAL_UINT8; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_byte, + opal_dss_unpack_byte, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint8, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint8, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT8", &tmp))) { + return rc; + } + tmp = OPAL_INT16; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int16, + opal_dss_unpack_int16, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int16, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int16, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT16", &tmp))) { + return rc; + } + tmp = OPAL_UINT16; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int16, + opal_dss_unpack_int16, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint16, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint16, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT16", &tmp))) { + return rc; + } + tmp = OPAL_INT32; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int32, + opal_dss_unpack_int32, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int32, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int32, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT32", &tmp))) { + return rc; + } + tmp = OPAL_UINT32; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int32, + opal_dss_unpack_int32, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint32, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint32, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT32", &tmp))) { + return rc; + } + tmp = OPAL_INT64; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int64, + opal_dss_unpack_int64, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int64, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int64, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT64", &tmp))) { + return rc; + } + tmp = OPAL_UINT64; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int64, + opal_dss_unpack_int64, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint64, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint64, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT64", &tmp))) { + return rc; + } + tmp = OPAL_SIZE; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_sizet, + opal_dss_unpack_sizet, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_size, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_size, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_SIZE", &tmp))) { + return rc; + } + tmp = OPAL_PID; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_pid, + opal_dss_unpack_pid, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_pid, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_pid, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_PID", &tmp))) { + return rc; + } + tmp = OPAL_STRING; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_string, + opal_dss_unpack_string, + (opal_dss_copy_fn_t)opal_dss_copy_string, + (opal_dss_compare_fn_t)opal_dss_compare_string, + (opal_dss_size_fn_t)opal_dss_size_string, + (opal_dss_print_fn_t)opal_dss_print_string, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_STRUCTURED, + "OPAL_STRING", &tmp))) { + return rc; + } + tmp = OPAL_DATA_TYPE; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_data_type, + opal_dss_unpack_data_type, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_dt, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_data_type, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_DATA_TYPE", &tmp))) { + return rc; + } + tmp = OPAL_DATA_VALUE; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_data_value, + opal_dss_unpack_data_value, + (opal_dss_copy_fn_t)opal_dss_copy_data_value, + (opal_dss_compare_fn_t)opal_dss_compare_data_value, + (opal_dss_size_fn_t)opal_dss_size_data_value, + (opal_dss_print_fn_t)opal_dss_print_data_value, + (opal_dss_release_fn_t)opal_dss_std_obj_release, + OPAL_DSS_STRUCTURED, + "OPAL_DATA_VALUE", &tmp))) { + return rc; + } + + tmp = OPAL_BYTE_OBJECT; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_byte_object, + opal_dss_unpack_byte_object, + (opal_dss_copy_fn_t)opal_dss_copy_byte_object, + (opal_dss_compare_fn_t)opal_dss_compare_byte_object, + (opal_dss_size_fn_t)opal_dss_size_byte_object, + (opal_dss_print_fn_t)opal_dss_print_byte_object, + (opal_dss_release_fn_t)opal_dss_release_byte_object, + OPAL_DSS_STRUCTURED, + "OPAL_BYTE_OBJECT", &tmp))) { + return rc; + } + + /* All done */ + + return OPAL_SUCCESS; +} + + +int opal_dss_close(void) +{ + int32_t i; + + opal_dss_initialized = false; + + for (i = 0 ; i < opal_pointer_array_get_size(&opal_dss_types) ; ++i) { + opal_dss_type_info_t *info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, i); + if (NULL != info) { + OBJ_RELEASE(info); + } + } + + OBJ_DESTRUCT(&opal_dss_types); + + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_pack.c b/opal/dss/dss_pack.c new file mode 100644 index 0000000000..02f5d4aed3 --- /dev/null +++ b/opal/dss/dss_pack.c @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/types.h" +#include "opal/util/output.h" +#include "opal/dss/dss_internal.h" + +int opal_dss_pack(opal_buffer_t *buffer, const void *src, int32_t num_vals, + opal_data_type_t type) +{ + int rc; + + /* check for error */ + if (NULL == buffer) { + return OPAL_ERR_BAD_PARAM; + } + + /* Pack the number of values */ + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + if (OPAL_SUCCESS != (rc = opal_dss_store_data_type(buffer, OPAL_INT32))) { + return rc; + } + } + if (OPAL_SUCCESS != (rc = opal_dss_pack_int32(buffer, &num_vals, 1, OPAL_INT32))) { + return rc; + } + + /* Pack the value(s) */ + return opal_dss_pack_buffer(buffer, src, num_vals, type); +} + +int opal_dss_pack_buffer(opal_buffer_t *buffer, const void *src, int32_t num_vals, + opal_data_type_t type) +{ + int rc; + opal_dss_type_info_t *info; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_buffer( %p, %p, %lu, %d )\n", + (void*)buffer, src, (long unsigned int)num_vals, (int)type ) ); + + /* Pack the declared data type */ + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + if (OPAL_SUCCESS != (rc = opal_dss_store_data_type(buffer, type))) { + return rc; + } + } + + /* Lookup the pack function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_PACK_FAILURE; + } + + return info->odti_pack_fn(buffer, src, num_vals, type); +} + + +/* PACK FUNCTIONS FOR GENERIC SYSTEM TYPES */ + +/* + * BOOL + */ +int opal_dss_pack_bool(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. If we aren't fully described, then add the + description for this type... */ + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, DSS_TYPE_BOOL))) { + return ret; + } + } + + /* Turn around and pack the real type */ + return opal_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_BOOL); +} + +/* + * INT + */ +int opal_dss_pack_int(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. If we aren't fully described, then add the + description for this type... */ + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, DSS_TYPE_INT))) { + return ret; + } + } + + /* Turn around and pack the real type */ + return opal_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_INT); +} + +/* + * SIZE_T + */ +int opal_dss_pack_sizet(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. If we aren't fully described, then add the + description for this type... */ + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, DSS_TYPE_SIZE_T))) { + return ret; + } + } + + return opal_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_SIZE_T); +} + +/* + * PID_T + */ +int opal_dss_pack_pid(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. If we aren't fully described, then add the + description for this type... */ + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, DSS_TYPE_PID_T))) { + return ret; + } + } + + /* Turn around and pack the real type */ + return opal_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_PID_T); +} + + +/* PACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ + +/* + * NULL + */ +int opal_dss_pack_null(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + char null=0x00; + char *dst; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_null * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, num_vals))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* store the nulls */ + memset(dst, (int)null, num_vals); + + /* update buffer pointers */ + buffer->pack_ptr += num_vals; + buffer->bytes_used += num_vals; + + return OPAL_SUCCESS; +} + +/* + * BYTE, CHAR, INT8 + */ +int opal_dss_pack_byte(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + char *dst; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_byte * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, num_vals))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* store the data */ + memcpy(dst, src, num_vals); + + /* update buffer pointers */ + buffer->pack_ptr += num_vals; + buffer->bytes_used += num_vals; + + return OPAL_SUCCESS; +} + +/* + * INT16 + */ +int opal_dss_pack_int16(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int32_t i; + uint16_t tmp, *srctmp = (uint16_t*) src; + char *dst; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_int16 * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, num_vals*sizeof(tmp)))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (i = 0; i < num_vals; ++i) { + tmp = htons(srctmp[i]); + memcpy(dst, &tmp, sizeof(tmp)); + dst += sizeof(tmp); + } + buffer->pack_ptr += num_vals * sizeof(tmp); + buffer->bytes_used += num_vals * sizeof(tmp); + + return OPAL_SUCCESS; +} + +/* + * INT32 + */ +int opal_dss_pack_int32(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int32_t i; + uint32_t tmp, *srctmp = (uint32_t*) src; + char *dst; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_int32 * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, num_vals*sizeof(tmp)))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (i = 0; i < num_vals; ++i) { + tmp = htonl(srctmp[i]); + memcpy(dst, &tmp, sizeof(tmp)); + dst += sizeof(tmp); + } + buffer->pack_ptr += num_vals * sizeof(tmp); + buffer->bytes_used += num_vals * sizeof(tmp); + + return OPAL_SUCCESS; +} + +/* + * INT64 + */ +int opal_dss_pack_int64(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int32_t i; + uint64_t tmp, *srctmp = (uint64_t*) src; + char *dst; + size_t bytes_packed = num_vals * sizeof(tmp); + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_int64 * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, bytes_packed))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (i = 0; i < num_vals; ++i) { + tmp = hton64(srctmp[i]); + memcpy(dst, &tmp, sizeof(tmp)); + dst += sizeof(tmp); + } + buffer->pack_ptr += bytes_packed; + buffer->bytes_used += bytes_packed; + + return OPAL_SUCCESS; +} + +/* + * STRING + */ +int opal_dss_pack_string(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret = OPAL_SUCCESS; + int32_t i, len; + char **ssrc = (char**) src; + + for (i = 0; i < num_vals; ++i) { + if (NULL == ssrc[i]) { /* got zero-length string/NULL pointer - store NULL */ + len = 0; + if (OPAL_SUCCESS != (ret = opal_dss_pack_int32(buffer, &len, 1, OPAL_INT32))) { + return ret; + } + } else { + len = (int32_t)strlen(ssrc[i]) + 1; + if (OPAL_SUCCESS != (ret = opal_dss_pack_int32(buffer, &len, 1, OPAL_INT32))) { + return ret; + } + if (OPAL_SUCCESS != (ret = + opal_dss_pack_byte(buffer, ssrc[i], len, OPAL_BYTE))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} + +/* PACK FUNCTIONS FOR GENERIC OPAL TYPES */ + +/* + * OPAL_DATA_TYPE + */ +int opal_dss_pack_data_type(opal_buffer_t *buffer, const void *src, int32_t num_vals, + opal_data_type_t type) +{ + int ret; + + /* Turn around and pack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, src, num_vals, OPAL_DATA_TYPE_T))) { + } + + return ret; +} + +/* + * OPAL_DATA_VALUE + */ +int opal_dss_pack_data_value(opal_buffer_t *buffer, const void *src, int32_t num, opal_data_type_t type) +{ + opal_dss_type_info_t *info; + opal_dss_value_t **sdv; + int32_t i; + int ret; + + sdv = (opal_dss_value_t **) src; + + for (i = 0; i < num; ++i) { + /* if the src data value is NULL, then we will pack it as OPAL_NULL to indicate + * that the unpack should leave a NULL data value + */ + if (NULL == sdv[i]) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, OPAL_NULL))) { + return ret; + } + continue; + } + + /* pack the data type - we'll need it on the other end */ + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, sdv[i]->type))) { + return ret; + } + + /* if the data type is UNDEF, then nothing more to do */ + if (OPAL_UNDEF == sdv[i]->type) continue; + + /* Lookup the pack function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, sdv[i]->type))) { + return OPAL_ERR_PACK_FAILURE; + } + + if (info->odti_structured) { + if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, &(sdv[i]->data), 1, sdv[i]->type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, sdv[i]->data, 1, sdv[i]->type))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} + +/* + * OPAL_BYTE_OBJECT + */ +int opal_dss_pack_byte_object(opal_buffer_t *buffer, const void *src, int32_t num, + opal_data_type_t type) +{ + opal_byte_object_t **sbyteptr; + int32_t i, n; + int ret; + + sbyteptr = (opal_byte_object_t **) src; + + for (i = 0; i < num; ++i) { + n = sbyteptr[i]->size; + if (OPAL_SUCCESS != (ret = opal_dss_pack_int32(buffer, &n, 1, OPAL_INT32))) { + return ret; + } + if (0 < n) { + if (OPAL_SUCCESS != (ret = + opal_dss_pack_byte(buffer, sbyteptr[i]->bytes, n, OPAL_BYTE))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} diff --git a/orte/dss/dss_peek.c b/opal/dss/dss_peek.c similarity index 52% rename from orte/dss/dss_peek.c rename to opal/dss/dss_peek.c index 9688a4a34f..78ec5f38cb 100644 --- a/orte/dss/dss_peek.c +++ b/opal/dss/dss_peek.c @@ -16,115 +16,101 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include "orte/mca/errmgr/errmgr.h" +#include "opal/dss/dss_internal.h" -#include "orte/dss/dss_internal.h" - - -int orte_dss_peek(orte_buffer_t *buffer, orte_data_type_t *type, - orte_std_cntr_t *num_vals) +int opal_dss_peek(opal_buffer_t *buffer, opal_data_type_t *type, + int32_t *num_vals) { int ret; - orte_buffer_t tmp; - orte_std_cntr_t n=1; - orte_data_type_t local_type; + opal_buffer_t tmp; + int32_t n=1; + opal_data_type_t local_type; /* check for errors */ if (buffer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* Double check and ensure that there is data left in the buffer. */ if (buffer->unpack_ptr >= buffer->base_ptr + buffer->bytes_used) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER); - *type = ORTE_NULL; + *type = OPAL_NULL; *num_vals = 0; - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; } /* if this is NOT a fully described buffer, then that is as much as * we can do - there is no way we can tell the caller what type is * in the buffer since that info wasn't stored. */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - *type = ORTE_UNDEF; + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + *type = OPAL_UNDEF; *num_vals = 0; - return ORTE_ERR_UNKNOWN_DATA_TYPE; + return OPAL_ERR_UNKNOWN_DATA_TYPE; } /* cheat: unpack from a copy of the buffer -- leaving all the original pointers intact */ tmp = *buffer; - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(&tmp, &local_type))) { - ORTE_ERROR_LOG(ret); - *type = ORTE_NULL; + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(&tmp, &local_type))) { + *type = OPAL_NULL; *num_vals = 0; return ret; } - if (ORTE_STD_CNTR != local_type) { /* if the length wasn't first, then error */ - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE); - *type = ORTE_NULL; + if (OPAL_INT32 != local_type) { /* if the length wasn't first, then error */ + *type = OPAL_NULL; *num_vals = 0; - return ORTE_ERR_UNPACK_FAILURE; + return OPAL_ERR_UNPACK_FAILURE; } - if (ORTE_SUCCESS != (ret = orte_dss_unpack_std_cntr(&tmp, num_vals, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - *type = ORTE_NULL; + if (OPAL_SUCCESS != (ret = opal_dss_unpack_int32(&tmp, num_vals, &n, OPAL_INT32))) { + *type = OPAL_NULL; *num_vals = 0; return ret; } - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(&tmp, type))) { - ORTE_ERROR_LOG(ret); - *type = ORTE_NULL; + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(&tmp, type))) { + *type = OPAL_NULL; *num_vals = 0; } return ret; } -int orte_dss_peek_type(orte_buffer_t *buffer, orte_data_type_t *type) +int opal_dss_peek_type(opal_buffer_t *buffer, opal_data_type_t *type) { int ret; - orte_buffer_t tmp; + opal_buffer_t tmp; /* check for errors */ if (buffer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* if this is NOT a fully described buffer, then there isn't anything * we can do - there is no way we can tell the caller what type is * in the buffer since that info wasn't stored. */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - *type = ORTE_UNDEF; - return ORTE_ERR_UNKNOWN_DATA_TYPE; + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + *type = OPAL_UNDEF; + return OPAL_ERR_UNKNOWN_DATA_TYPE; } /* Double check and ensure that there is data left in the buffer. */ if (buffer->unpack_ptr >= buffer->base_ptr + buffer->bytes_used) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER); - *type = ORTE_UNDEF; - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + *type = OPAL_UNDEF; + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; } /* cheat: unpack from a copy of the buffer -- leaving all the original pointers intact */ tmp = *buffer; - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(&tmp, type))) { - ORTE_ERROR_LOG(ret); - *type = ORTE_UNDEF; + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(&tmp, type))) { + *type = OPAL_UNDEF; return ret; } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/opal/dss/dss_print.c b/opal/dss/dss_print.c new file mode 100644 index 0000000000..e17f86075d --- /dev/null +++ b/opal/dss/dss_print.c @@ -0,0 +1,461 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include + +#include "opal/dss/dss_internal.h" + +int opal_dss_print(char **output, char *prefix, void *src, opal_data_type_t type) +{ + opal_dss_type_info_t *info; + + /* check for error */ + if (NULL == output) { + return OPAL_ERR_BAD_PARAM; + } + + /* Lookup the print function for this type and call it */ + + if(NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNKNOWN_DATA_TYPE; + } + + return info->odti_print_fn(output, prefix, src, type); +} + +/* + * STANDARD PRINT FUNCTIONS FOR SYSTEM TYPES + */ +int opal_dss_print_byte(char **output, char *prefix, uint8_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_BYTE\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_BYTE\tValue: %x", prefix, *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_string(char **output, char *prefix, char *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_STRING\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_STRING\tValue: %s", prefx, src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_size(char **output, char *prefix, size_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_SIZE\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_SIZE\tValue: %lu", prefx, (unsigned long) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_pid(char **output, char *prefix, pid_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_PID\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_PID\tValue: %lu", prefx, (unsigned long) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_bool(char **output, char *prefix, bool *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_BOOL\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_BOOL\tValue: %s", prefx, *src ? "TRUE" : "FALSE"); + + return OPAL_SUCCESS; +} + +int opal_dss_print_int(char **output, char *prefix, int *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_INT\tValue: %ld", prefx, (long) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_uint(char **output, char *prefix, int *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_UINT\tValue: %lu", prefx, (unsigned long) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_uint8(char **output, char *prefix, uint8_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT8\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_UINT8\tValue: %u", prefx, (unsigned int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_uint16(char **output, char *prefix, uint16_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT16\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_UINT16\tValue: %u", prefx, (unsigned int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_uint32(char **output, char *prefix, uint32_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT32\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_UINT32\tValue: %u", prefx, (unsigned int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_int8(char **output, char *prefix, int8_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT8\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_INT8\tValue: %d", prefx, (int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_int16(char **output, char *prefix, int16_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT16\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_INT16\tValue: %d", prefx, (int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_int32(char **output, char *prefix, int32_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT32\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_INT32\tValue: %d", prefx, (int) *src); + + return OPAL_SUCCESS; +} +int opal_dss_print_uint64(char **output, char *prefix, +#ifdef HAVE_INT64_T + uint64_t *src, +#else + void *src, +#endif /* HAVE_INT64_T */ + opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT64\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + +#ifdef HAVE_INT64_T + asprintf(output, "%sData type: OPAL_UINT64\tValue: %lu", prefx, (unsigned long) *src); +#else + asprintf(output, "%sData type: OPAL_UINT64\tValue: unsupported", prefx); +#endif /* HAVE_INT64_T */ + + return OPAL_SUCCESS; +} + +int opal_dss_print_int64(char **output, char *prefix, +#ifdef HAVE_INT64_T + int64_t *src, +#else + void *src, +#endif /* HAVE_INT64_T */ + opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT64\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + +#ifdef HAVE_INT64_T + asprintf(output, "%sData type: OPAL_INT64\tValue: %ld", prefx, (long) *src); +#else + asprintf(output, "%sData type: OPAL_INT64\tValue: unsupported", prefx); +#endif /* HAVE_INT64_T */ + + return OPAL_SUCCESS; +} + +int opal_dss_print_null(char **output, char *prefix, void *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_NULL\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_NULL", prefx); + + return OPAL_SUCCESS; +} + + +/* PRINT FUNCTIONS FOR GENERIC OPAL TYPES */ + +/* + * OPAL_DATA_TYPE + */ +int opal_dss_print_data_type(char **output, char *prefix, opal_data_type_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_DATA_TYPE\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_DATA_TYPE\tValue: %lu", prefx, (unsigned long) *src); + return OPAL_SUCCESS; +} + +/* + * OPAL_DATA_VALUE + */ +int opal_dss_print_data_value(char **output, char *prefix, opal_dss_value_t *src, opal_data_type_t type) +{ + char *pfx, *tmp1, *tmp2; + int rc; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + if (NULL != prefix) { + asprintf(output, "%sData type: OPAL_DATA_VALUE\tValue: NULL pointer", prefix); + } else { + asprintf(output, "Data type: OPAL_DATA_VALUE\tValue: NULL pointer"); + } + return OPAL_SUCCESS; + } + + if (NULL != prefix) { + asprintf(&pfx, "%s\t", prefix); + asprintf(&tmp1, "%sData type: OPAL_DATA_VALUE:\n", prefix); + } else { + asprintf(&tmp1, "Data type: OPAL_DATA_VALUE:\n"); + asprintf(&pfx, "\t"); + } + + /* if data is included, print it */ + if (OPAL_UNDEF == src->type) { /* undefined data type - just report it */ + asprintf(&tmp2, "%sData type: OPAL_UNDEF\tValue: N/A", pfx); + } else if (NULL != src->data) { + if (OPAL_SUCCESS != (rc = opal_dss.print(&tmp2, pfx, src->data, src->type))) { + if (NULL != tmp1) free(tmp1); + if (NULL != pfx) free(pfx); + *output = NULL; + return rc; + } + } else { /* indicate the data field was NULL */ + asprintf(&tmp2, "%sData field is NULL", pfx); + } + + asprintf(output, "%s%s", tmp1, tmp2); + free(tmp1); + free(tmp2); + if (NULL != pfx) free(pfx); + + return OPAL_SUCCESS; +} + +/* + * OPAL_BYTE_OBJECT + */ +int opal_dss_print_byte_object(char **output, char *prefix, opal_byte_object_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_BYTE_OBJECT\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_BYTE_OBJECT\tSize: %lu", prefx, (unsigned long) src->size); + + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_register.c b/opal/dss/dss_register.c new file mode 100644 index 0000000000..323b860193 --- /dev/null +++ b/opal/dss/dss_register.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/dss/dss_internal.h" + +int opal_dss_register(opal_dss_pack_fn_t pack_fn, + opal_dss_unpack_fn_t unpack_fn, + opal_dss_copy_fn_t copy_fn, + opal_dss_compare_fn_t compare_fn, + opal_dss_size_fn_t size_fn, + opal_dss_print_fn_t print_fn, + opal_dss_release_fn_t release_fn, + bool structured, + const char *name, opal_data_type_t *type) +{ + opal_dss_type_info_t *info, *ptr; + int32_t i; + + /* Check for bozo cases */ + + if (NULL == pack_fn || NULL == unpack_fn || NULL == copy_fn || NULL == compare_fn || + NULL == size_fn || NULL == print_fn || NULL == name || NULL == type) { + return OPAL_ERR_BAD_PARAM; + } + + /* check if this entry already exists - if so, error - we do NOT allow multiple type registrations */ + for (i=0; i < opal_pointer_array_get_size(&opal_dss_types); i++) { + ptr = opal_pointer_array_get_item(&opal_dss_types, i); + if (NULL != ptr) { + /* check if the name exists */ + if (0 == strcmp(ptr->odti_name, name)) { + return OPAL_ERR_DATA_TYPE_REDEF; + } + /* check if the specified type exists */ + if (*type > 0 && ptr->odti_type == *type) { + return OPAL_ERR_DATA_TYPE_REDEF; + } + } + } + + /* if type is given (i.e., *type > 0), then just use it. + * otherwise, it is an error + */ + if (0 >= *type) { + return OPAL_ERR_BAD_PARAM; + } + + /* Add a new entry to the table */ + info = (opal_dss_type_info_t*) OBJ_NEW(opal_dss_type_info_t); + if (NULL == info) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + info->odti_type = *type; + info->odti_name = strdup(name); + info->odti_pack_fn = pack_fn; + info->odti_unpack_fn = unpack_fn; + info->odti_copy_fn = copy_fn; + info->odti_compare_fn = compare_fn; + info->odti_size_fn = size_fn; + info->odti_print_fn = print_fn; + info->odti_release_fn = release_fn; + info->odti_structured = structured; + + return opal_pointer_array_set_item(&opal_dss_types, *type, info); +} diff --git a/orte/dss/dss_release.c b/opal/dss/dss_release.c similarity index 62% rename from orte/dss/dss_release.c rename to opal/dss/dss_release.c index 1c118c44f0..2614a22c11 100644 --- a/orte/dss/dss_release.c +++ b/opal/dss/dss_release.c @@ -16,33 +16,23 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" +#include "opal/dss/dss_internal.h" #include "opal/util/output.h" -void orte_dss_release(orte_data_value_t *value) +void opal_dss_release(opal_dss_value_t *value) { - orte_dss_type_info_t *info = NULL; + opal_dss_type_info_t *info = NULL; /* check for error */ if (NULL == value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return; } /* Lookup the release function for this type and call it */ - if (!(value->type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, value->type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, value->type))) { return; } @@ -52,7 +42,7 @@ void orte_dss_release(orte_data_value_t *value) /* * STANDARD RELEASE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED */ -void orte_dss_std_release(orte_data_value_t *value) +void opal_dss_std_release(opal_dss_value_t *value) { free(value->data); value->data = NULL; @@ -61,20 +51,20 @@ void orte_dss_std_release(orte_data_value_t *value) /* * STANDARD OBJECT RELEASE FUNCTION - WORKS FOR EVERYTHING */ -void orte_dss_std_obj_release(orte_data_value_t *value) +void opal_dss_std_obj_release(opal_dss_value_t *value) { OBJ_RELEASE(value->data); } /* - * ORTE_BYTE_OBJECT + * OPAL_BYTE_OBJECT */ -void orte_dss_release_byte_object(orte_data_value_t *value) +void opal_dss_release_byte_object(opal_dss_value_t *value) { - orte_byte_object_t *bo; + opal_byte_object_t *bo; - bo = (orte_byte_object_t*)value->data; + bo = (opal_byte_object_t*)value->data; free(bo->bytes); free(value->data); diff --git a/orte/dss/dss_set.c b/opal/dss/dss_set.c similarity index 65% rename from orte/dss/dss_set.c rename to opal/dss/dss_set.c index b7af15e68b..26a7592ac1 100644 --- a/orte/dss/dss_set.c +++ b/opal/dss/dss_set.c @@ -14,26 +14,15 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif +#include "opal/dss/dss_internal.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - -int orte_dss_set(orte_data_value_t *value, void *new_value, orte_data_type_t type) +int opal_dss_set(opal_dss_value_t *value, void *new_value, opal_data_type_t type) { /* check for error */ if (NULL == value || NULL == new_value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* see if a value is already loaded - if so, that's just wrong. We can't @@ -41,8 +30,7 @@ int orte_dss_set(orte_data_value_t *value, void *new_value, orte_data_type_t typ was stored dynamically */ if (NULL != value->data) { - ORTE_ERROR_LOG(ORTE_ERR_DATA_OVERWRITE_ATTEMPT); - return ORTE_ERR_DATA_OVERWRITE_ATTEMPT; + return OPAL_ERR_DATA_OVERWRITE_ATTEMPT; } /* set the type */ @@ -51,6 +39,6 @@ int orte_dss_set(orte_data_value_t *value, void *new_value, orte_data_type_t typ /* point the value to the data object */ value->data = new_value; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/orte/dss/dss_set_buffer_type.c b/opal/dss/dss_set_buffer_type.c similarity index 63% rename from orte/dss/dss_set_buffer_type.c rename to opal/dss/dss_set_buffer_type.c index 664e2bd208..de29626e6f 100644 --- a/orte/dss/dss_set_buffer_type.c +++ b/opal/dss/dss_set_buffer_type.c @@ -14,37 +14,25 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif +#include "opal/dss/dss_internal.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - -int orte_dss_set_buffer_type(orte_buffer_t *buffer, orte_dss_buffer_type_t type) +int opal_dss_set_buffer_type(opal_buffer_t *buffer, opal_dss_buffer_type_t type) { /** check for error */ if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /** see if the buffer is empty - if not, generate error */ if (buffer->base_ptr != buffer->pack_ptr) { - ORTE_ERROR_LOG(ORTE_ERR_BUFFER); - return ORTE_ERR_BUFFER; + return OPAL_ERR_BUFFER; } /** set the type */ buffer->type = type; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/orte/dss/dss_size.c b/opal/dss/dss_size.c similarity index 50% rename from orte/dss/dss_size.c rename to opal/dss/dss_size.c index fd55c0a386..c132900f57 100644 --- a/orte/dss/dss_size.c +++ b/opal/dss/dss_size.c @@ -16,112 +16,83 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif +#include "opal/dss/dss_internal.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" - -int orte_dss_size(size_t *size, void *src, orte_data_type_t type) +int opal_dss_size(size_t *size, void *src, opal_data_type_t type) { - int rc; - orte_dss_type_info_t *info; + opal_dss_type_info_t *info; /* check for error */ if (NULL == size) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* Lookup the size function for this type and call it */ - if (!(type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNKNOWN_DATA_TYPE; } - if (ORTE_SUCCESS != (rc = info->odti_size_fn(size, src, type))) { - ORTE_ERROR_LOG(rc); - } - - - return rc; + return info->odti_size_fn(size, src, type); } /* * STANDARD SIZE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED */ -int orte_dss_std_size(size_t *size, void *src, orte_data_type_t type) +int opal_dss_std_size(size_t *size, void *src, opal_data_type_t type) { switch(type) { - case ORTE_BOOL: + case OPAL_BOOL: *size = sizeof(bool); break; - case ORTE_INT: - case ORTE_UINT: + case OPAL_INT: + case OPAL_UINT: *size = sizeof(int); break; - case ORTE_SIZE: + case OPAL_SIZE: *size = sizeof(size_t); break; - case ORTE_PID: + case OPAL_PID: *size = sizeof(pid_t); break; - case ORTE_BYTE: - case ORTE_INT8: - case ORTE_UINT8: - case ORTE_NULL: + case OPAL_BYTE: + case OPAL_INT8: + case OPAL_UINT8: + case OPAL_NULL: *size = 1; break; - case ORTE_INT16: - case ORTE_UINT16: + case OPAL_INT16: + case OPAL_UINT16: *size = sizeof(uint16_t); break; - case ORTE_INT32: - case ORTE_UINT32: + case OPAL_INT32: + case OPAL_UINT32: *size = sizeof(uint32_t); break; - case ORTE_INT64: - case ORTE_UINT64: + case OPAL_INT64: + case OPAL_UINT64: *size = sizeof(uint64_t); break; - case ORTE_STD_CNTR: - *size = sizeof(orte_std_cntr_t); + case OPAL_DATA_TYPE: + *size = sizeof(opal_data_type_t); break; - case ORTE_DATA_TYPE: - *size = sizeof(orte_data_type_t); - break; - -#if OPAL_ENABLE_FT == 1 - case ORTE_CKPT_CMD: - *size = sizeof(size_t); - break; -#endif - default: *size = 0; - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; + return OPAL_ERR_UNKNOWN_DATA_TYPE; } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* SIZE FUNCTIONS FOR NON-STANDARD SYSTEM TYPES */ @@ -129,7 +100,7 @@ int orte_dss_std_size(size_t *size, void *src, orte_data_type_t type) /* * STRING */ -int orte_dss_size_string(size_t *size, char *src, orte_data_type_t type) +int opal_dss_size_string(size_t *size, char *src, opal_data_type_t type) { if (NULL != src) { *size = strlen(src) + 1; @@ -137,48 +108,47 @@ int orte_dss_size_string(size_t *size, char *src, orte_data_type_t type) *size = sizeof(char*); /* account for NULL */ } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } -/* SIZE FUNCTIONS FOR GENERIC ORTE TYPES */ +/* SIZE FUNCTIONS FOR GENERIC OPAL TYPES */ /* - * ORTE_DATA_VALUE + * OPAL_DATA_VALUE */ -int orte_dss_size_data_value(size_t *size, orte_data_value_t *src, orte_data_type_t type) +int opal_dss_size_data_value(size_t *size, opal_dss_value_t *src, opal_data_type_t type) { size_t data_size; int rc; /* account for size of object itself... */ - *size = sizeof(orte_data_value_t); + *size = sizeof(opal_dss_value_t); if (NULL != src) { /* ...and the number of bytes in the payload, IF an actual object was provided */ - if (ORTE_SUCCESS != (rc = orte_dss.size(&data_size, src->data, src->type))) { - ORTE_ERROR_LOG(rc); + if (OPAL_SUCCESS != (rc = opal_dss.size(&data_size, src->data, src->type))) { return rc; } *size += data_size; } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* - * ORTE_BYTE_OBJECT + * OPAL_BYTE_OBJECT */ -int orte_dss_size_byte_object(size_t *size, orte_byte_object_t *src, orte_data_type_t type) +int opal_dss_size_byte_object(size_t *size, opal_byte_object_t *src, opal_data_type_t type) { /* account for size of object itself... */ - *size = sizeof(orte_byte_object_t); + *size = sizeof(opal_byte_object_t); if (NULL != src) { /* ...and the number of bytes in the payload, IF an actual object was provided */ *size += src->size; } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/opal/dss/dss_types.h b/opal/dss/dss_types.h new file mode 100644 index 0000000000..2722c7fa20 --- /dev/null +++ b/opal/dss/dss_types.h @@ -0,0 +1,147 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Buffer management types. + */ + +#ifndef OPAL_DSS_TYPES_H_ +#define OPAL_DSS_TYPES_H_ + +#include "opal_config.h" +#include "opal/types.h" + +#include "opal/class/opal_object.h" + +BEGIN_C_DECLS + +typedef uint8_t opal_data_type_t; /** data type indicators */ +#define OPAL_DATA_TYPE_T OPAL_UINT8 +#define OPAL_DSS_ID_MAX UINT8_MAX +#define OPAL_DSS_ID_INVALID OPAL_DSS_ID_MAX + +/* define a structure to hold generic byte objects */ +typedef struct { + int32_t size; + uint8_t *bytes; +} opal_byte_object_t; + +/* Type defines for packing and unpacking */ +#define OPAL_UNDEF (opal_data_type_t) 0 /**< type hasn't been defined yet */ +#define OPAL_BYTE (opal_data_type_t) 1 /**< a byte of data */ +#define OPAL_BOOL (opal_data_type_t) 2 /**< boolean */ +#define OPAL_STRING (opal_data_type_t) 3 /**< a NULL terminated string */ +#define OPAL_SIZE (opal_data_type_t) 4 /**< the generic size_t */ +#define OPAL_PID (opal_data_type_t) 5 /**< process pid */ + /* all the integer flavors */ +#define OPAL_INT (opal_data_type_t) 6 /**< generic integer */ +#define OPAL_INT8 (opal_data_type_t) 7 /**< an 8-bit integer */ +#define OPAL_INT16 (opal_data_type_t) 8 /**< a 16-bit integer */ +#define OPAL_INT32 (opal_data_type_t) 9 /**< a 32-bit integer */ +#define OPAL_INT64 (opal_data_type_t) 10 /**< a 64-bit integer */ + /* all the unsigned integer flavors */ +#define OPAL_UINT (opal_data_type_t) 11 /**< generic unsigned integer */ +#define OPAL_UINT8 (opal_data_type_t) 12 /**< an 8-bit unsigned integer */ +#define OPAL_UINT16 (opal_data_type_t) 13 /**< a 16-bit unsigned integer */ +#define OPAL_UINT32 (opal_data_type_t) 14 /**< a 32-bit unsigned integer */ +#define OPAL_UINT64 (opal_data_type_t) 15 /**< a 64-bit unsigned integer */ + /* we don't support floating point types */ + /* General types */ +#define OPAL_BYTE_OBJECT (opal_data_type_t) 16 /**< byte object structure */ +#define OPAL_DATA_TYPE (opal_data_type_t) 17 /**< data type */ +#define OPAL_NULL (opal_data_type_t) 18 /**< don't interpret data type */ +#define OPAL_DATA_VALUE (opal_data_type_t) 19 /**< data value */ + +#define OPAL_DSS_ID_DYNAMIC (opal_data_type_t) 20 + +/* define the results values for comparisons so we can change them in only one place */ +#define OPAL_VALUE1_GREATER +1 +#define OPAL_VALUE2_GREATER -1 +#define OPAL_EQUAL 0 + +/* define arithmetic operations for readability */ +typedef uint8_t opal_dss_arith_op_t; + +#define OPAL_DSS_ADD 1 +#define OPAL_DSS_SUB 2 +#define OPAL_DSS_MUL 3 +#define OPAL_DSS_DIV 4 + + +/* Data value object */ +typedef struct { + opal_object_t super; /* required for this to be an object */ + opal_data_type_t type; /* the type of value stored */ + void *data; +} opal_dss_value_t; +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_dss_value_t); + +#define OPAL_DATA_VALUE_EMPTY { OPAL_OBJ_STATIC_INIT(opal_dss_value_t), OPAL_UNDEF, NULL} + +/* structured-unstructured data flags */ +#define OPAL_DSS_STRUCTURED true +#define OPAL_DSS_UNSTRUCTURED false + +/** + * buffer type + */ +typedef uint8_t opal_dss_buffer_type_t; +#define OPAL_DSS_BUFFER_NON_DESC 0x00 +#define OPAL_DSS_BUFFER_FULLY_DESC 0x01 + +#define OPAL_DSS_BUFFER_TYPE_HTON(h); +#define OPAL_DSS_BUFFER_TYPE_NTOH(h); + +/** + * Structure for holding a buffer to be used with the RML or OOB + * subsystems. + */ + struct opal_buffer_t { + /** First member must be the object's parent */ + opal_object_t parent; + /** type of buffer */ + opal_dss_buffer_type_t type; + /** Start of my memory */ + char *base_ptr; + /** Where the next data will be packed to (within the allocated + memory starting at base_ptr) */ + char *pack_ptr; + /** Where the next data will be unpacked from (within the + allocated memory starting as base_ptr) */ + char *unpack_ptr; + + /** Number of bytes allocated (starting at base_ptr) */ + size_t bytes_allocated; + /** Number of bytes used by the buffer (i.e., amount of data -- + including overhead -- packed in the buffer) */ + size_t bytes_used; + }; + /** + * Convenience typedef + */ + typedef struct opal_buffer_t opal_buffer_t; + + /** formalize the declaration */ + OPAL_DECLSPEC OBJ_CLASS_DECLARATION (opal_buffer_t); + +END_C_DECLS + +#endif /* OPAL_DSS_TYPES_H */ diff --git a/opal/dss/dss_unpack.c b/opal/dss/dss_unpack.c new file mode 100644 index 0000000000..79602fef12 --- /dev/null +++ b/opal/dss/dss_unpack.c @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/util/output.h" +#include "opal/dss/dss_internal.h" + +int opal_dss_unpack(opal_buffer_t *buffer, void *dst, int32_t *num_vals, + opal_data_type_t type) +{ + int rc, ret; + int32_t local_num, n=1; + opal_data_type_t local_type; + + /* check for error */ + if (NULL == buffer || NULL == dst || NULL == num_vals) { + return OPAL_ERR_BAD_PARAM; + } + + /* if user provides a zero for num_vals, then there is no storage allocated + * so return an appropriate error + */ + if (0 == *num_vals) { + return OPAL_ERR_UNPACK_INADEQUATE_SPACE; + } + + /** Unpack the declared number of values + * REMINDER: it is possible that the buffer is corrupted and that + * the DSS will *think* there is a proper int32_t variable at the + * beginning of the unpack region - but that the value is bogus (e.g., just + * a byte field in a string array that so happens to have a value that + * matches the int32_t data type flag). Therefore, this error check is + * NOT completely safe. This is true for ALL unpack functions, not just + * int32_t as used here. + */ + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + if (OPAL_SUCCESS != ( + rc = opal_dss_get_data_type(buffer, &local_type))) { + *num_vals = 0; + return rc; + } + if (OPAL_INT32 != local_type) { /* if the length wasn't first, then error */ + *num_vals = 0; + return OPAL_ERR_UNPACK_FAILURE; + } + } + + n=1; + if (OPAL_SUCCESS != (rc = opal_dss_unpack_int32(buffer, &local_num, &n, OPAL_INT32))) { + *num_vals = 0; + return rc; + } + + /** if the storage provided is inadequate, set things up + * to unpack as much as we can and to return an error code + * indicating that everything was not unpacked - the buffer + * is left in a state where it can not be further unpacked. + */ + if (local_num > *num_vals) { + local_num = *num_vals; + ret = OPAL_ERR_UNPACK_INADEQUATE_SPACE; + } else { /** enough or more than enough storage */ + *num_vals = local_num; /** let the user know how many we actually unpacked */ + ret = OPAL_SUCCESS; + } + + /** Unpack the value(s) */ + if (OPAL_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dst, &local_num, type))) { + *num_vals = 0; + ret = rc; + } + + return ret; +} + +int opal_dss_unpack_buffer(opal_buffer_t *buffer, void *dst, int32_t *num_vals, + opal_data_type_t type) +{ + int rc; + opal_data_type_t local_type; + opal_dss_type_info_t *info; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_buffer( %p, %p, %lu, %d )\n", + (void*)buffer, dst, (long unsigned int)*num_vals, (int)type ) ); + + /** Unpack the declared data type */ + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + if (OPAL_SUCCESS != (rc = opal_dss_get_data_type(buffer, &local_type))) { + return rc; + } + /* if the data types don't match, then return an error */ + if (type != local_type) { + return OPAL_ERR_PACK_MISMATCH; + } + } + + /* Lookup the unpack function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNPACK_FAILURE; + } + + return info->odti_unpack_fn(buffer, dst, num_vals, type); +} + + +/* UNPACK GENERIC SYSTEM TYPES */ + +/* + * BOOL + */ +int opal_dss_unpack_bool(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + opal_data_type_t remote_type; + + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + /* see what type was actually packed */ + if (OPAL_SUCCESS != (ret = opal_dss_peek_type(buffer, &remote_type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &remote_type))) { + return ret; + } + } + + if (remote_type == DSS_TYPE_BOOL) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_BOOL))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(bool, remote_type, ret); + } + return ret; +} + +/* + * INT + */ +int opal_dss_unpack_int(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + opal_data_type_t remote_type; + + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + /* see what type was actually packed */ + if (OPAL_SUCCESS != (ret = opal_dss_peek_type(buffer, &remote_type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &remote_type))) { + return ret; + } + } + + if (remote_type == DSS_TYPE_INT) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_INT))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(int, remote_type, ret); + } + + return ret; +} + +/* + * SIZE_T + */ +int opal_dss_unpack_sizet(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + opal_data_type_t remote_type; + + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + /* see what type was actually packed */ + if (OPAL_SUCCESS != (ret = opal_dss_peek_type(buffer, &remote_type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &remote_type))) { + return ret; + } + } + + if (remote_type == DSS_TYPE_SIZE_T) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_SIZE_T))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(size_t, remote_type, ret); + } + + return ret; +} + +/* + * PID_T + */ +int opal_dss_unpack_pid(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + opal_data_type_t remote_type; + + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + /* see what type was actually packed */ + if (OPAL_SUCCESS != (ret = opal_dss_peek_type(buffer, &remote_type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &remote_type))) { + return ret; + } + } + + if (remote_type == DSS_TYPE_PID_T) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_PID_T))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(pid_t, remote_type, ret); + } + + return ret; +} + + +/* UNPACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ + +/* + * NULL + */ +int opal_dss_unpack_null(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_null * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, *num_vals)) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + memcpy(dest, buffer->unpack_ptr, *num_vals); + + /* update buffer pointer */ + buffer->unpack_ptr += *num_vals; + + return OPAL_SUCCESS; +} + +/* + * BYTE, CHAR, INT8 + */ +int opal_dss_unpack_byte(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_byte * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, *num_vals)) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + memcpy(dest, buffer->unpack_ptr, *num_vals); + + /* update buffer pointer */ + buffer->unpack_ptr += *num_vals; + + return OPAL_SUCCESS; +} + +int opal_dss_unpack_int16(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int32_t i; + uint16_t tmp, *desttmp = (uint16_t*) dest; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_int16 * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); + desttmp[i] = ntohs(tmp); + buffer->unpack_ptr += sizeof(tmp); + } + + return OPAL_SUCCESS; +} + +int opal_dss_unpack_int32(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int32_t i; + uint32_t tmp, *desttmp = (uint32_t*) dest; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_int32 * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); + desttmp[i] = ntohl(tmp); + buffer->unpack_ptr += sizeof(tmp); + } + + return OPAL_SUCCESS; +} + +int opal_dss_unpack_int64(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int32_t i; + uint64_t tmp, *desttmp = (uint64_t*) dest; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_int64 * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); + desttmp[i] = ntoh64(tmp); + buffer->unpack_ptr += sizeof(tmp); + } + + return OPAL_SUCCESS; +} + +int opal_dss_unpack_string(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + int32_t i, len, n=1; + char **sdest = (char**) dest; + + for (i = 0; i < (*num_vals); ++i) { + if (OPAL_SUCCESS != (ret = opal_dss_unpack_int32(buffer, &len, &n, OPAL_INT32))) { + return ret; + } + if (0 == len) { /* zero-length string - unpack the NULL */ + sdest[i] = NULL; + } else { + sdest[i] = (char*)malloc(len); + if (NULL == sdest[i]) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + if (OPAL_SUCCESS != (ret = opal_dss_unpack_byte(buffer, sdest[i], &len, OPAL_BYTE))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} + + +/* UNPACK FUNCTIONS FOR GENERIC OPAL TYPES */ + +/* + * OPAL_DATA_TYPE + */ +int opal_dss_unpack_data_type(opal_buffer_t *buffer, void *dest, int32_t *num_vals, + opal_data_type_t type) +{ + /* turn around and unpack the real type */ + return opal_dss_unpack_buffer(buffer, dest, num_vals, OPAL_DATA_TYPE_T); +} + +/* + * OPAL_DATA_VALUE + */ +int opal_dss_unpack_data_value(opal_buffer_t *buffer, void *dest, int32_t *num, + opal_data_type_t type) +{ + opal_dss_type_info_t *info; + opal_dss_value_t **ddv; + int32_t i, n; + opal_data_type_t dt; + size_t nsize; + int ret; + + ddv = (opal_dss_value_t **) dest; + + for (i = 0; i < *num; ++i) { + /* see what the data type is */ + n = 1; + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &dt))) { + return ret; + } + + /* if it is OPAL_NULL, then do nothing */ + if (OPAL_NULL == dt) continue; + + /* otherwise, allocate the new object and set the type */ + + ddv[i] = OBJ_NEW(opal_dss_value_t); + if (NULL == ddv[i]) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + ddv[i]->type = dt; + + /* if it is UNDEF, then nothing more to do */ + if (OPAL_UNDEF == ddv[i]->type) continue; + + /* get enough memory to hold it */ + if (OPAL_SUCCESS != (ret = opal_dss.size(&nsize, NULL, ddv[i]->type))) { + return ret; + } + ddv[i]->data = (void*)malloc(nsize); + if (NULL == ddv[i]->data) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* Lookup the unpack function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, ddv[i]->type))) { + return OPAL_ERR_PACK_FAILURE; + } + + if (info->odti_structured) { + n=1; + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, &(ddv[i]->data), &n, ddv[i]->type))) { + return ret; + } + } else { + n=1; + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, ddv[i]->data, &n, ddv[i]->type))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} + + +/* + * OPAL_BYTE_OBJECT + */ +int opal_dss_unpack_byte_object(opal_buffer_t *buffer, void *dest, int32_t *num, + opal_data_type_t type) +{ + int ret; + int32_t i, n, m=1; + opal_byte_object_t **dbyteptr; + + dbyteptr = (opal_byte_object_t**)dest; + n = *num; + for(i=0; isize), &m, OPAL_INT32))) { + return ret; + } + if (0 < dbyteptr[i]->size) { + dbyteptr[i]->bytes = (uint8_t*)malloc(dbyteptr[i]->size); + if (NULL == dbyteptr[i]->bytes) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + if (OPAL_SUCCESS != (ret = opal_dss_unpack_byte(buffer, (dbyteptr[i]->bytes), + &(dbyteptr[i]->size), OPAL_BYTE))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index be2b69275b..6d85734a34 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -46,7 +46,18 @@ enum { OPAL_ERR_VALUE_OUT_OF_BOUNDS = (OPAL_ERR_BASE - 18), OPAL_ERR_FILE_READ_FAILURE = (OPAL_ERR_BASE - 19), OPAL_ERR_FILE_WRITE_FAILURE = (OPAL_ERR_BASE - 20), - OPAL_ERR_FILE_OPEN_FAILURE = (OPAL_ERR_BASE - 21) + OPAL_ERR_FILE_OPEN_FAILURE = (OPAL_ERR_BASE - 21), + OPAL_ERR_PACK_MISMATCH = (OPAL_ERR_BASE - 22), + OPAL_ERR_PACK_FAILURE = (OPAL_ERR_BASE - 23), + OPAL_ERR_UNPACK_FAILURE = (OPAL_ERR_BASE - 24), + OPAL_ERR_UNPACK_INADEQUATE_SPACE = (OPAL_ERR_BASE - 25), + OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER = (OPAL_ERR_BASE - 26), + OPAL_ERR_TYPE_MISMATCH = (OPAL_ERR_BASE - 27), + OPAL_ERR_OPERATION_UNSUPPORTED = (OPAL_ERR_BASE - 28), + OPAL_ERR_UNKNOWN_DATA_TYPE = (OPAL_ERR_BASE - 29), + OPAL_ERR_BUFFER = (OPAL_ERR_BASE - 30), + OPAL_ERR_DATA_TYPE_REDEF = (OPAL_ERR_BASE - 31), + OPAL_ERR_DATA_OVERWRITE_ATTEMPT = (OPAL_ERR_BASE - 32) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/mca/base/base.h b/opal/mca/base/base.h index 5fe72108ed..23b172cc1c 100644 --- a/opal/mca/base/base.h +++ b/opal/mca/base/base.h @@ -108,6 +108,7 @@ OPAL_DECLSPEC int mca_base_component_compare(const mca_base_component_t *a, const mca_base_component_t *b); OPAL_DECLSPEC int mca_base_component_compatible(const mca_base_component_t *a, const mca_base_component_t *b); +OPAL_DECLSPEC char * mca_base_component_to_string(const mca_base_component_t *a); /* mca_base_component_find.c */ diff --git a/opal/mca/base/mca_base_component_compare.c b/opal/mca/base/mca_base_component_compare.c index 34b8a59689..5e89028fdc 100644 --- a/opal/mca/base/mca_base_component_compare.c +++ b/opal/mca/base/mca_base_component_compare.c @@ -136,4 +136,17 @@ int mca_base_component_compatible( return 0; } +/** + * Returns a string which represents the component name and version. + * Has the form: comp_type.comp_name.major_version.minor_version + */ +char * mca_base_component_to_string(const mca_base_component_t *a) { + char * str = NULL; + if(0 > asprintf(&str, "%s.%s.%d.%d", a->mca_type_name, + a->mca_component_name, a->mca_component_major_version, + a->mca_component_minor_version)) { + return NULL; + } + return str; +} diff --git a/opal/runtime/opal_finalize.c b/opal/runtime/opal_finalize.c index 0cb4f6aee7..ae72421a6c 100644 --- a/opal/runtime/opal_finalize.c +++ b/opal/runtime/opal_finalize.c @@ -21,6 +21,7 @@ #include "opal_config.h" #include "opal/class/opal_object.h" +#include "opal/dss/dss.h" #include "opal/util/trace.h" #include "opal/util/output.h" #include "opal/util/malloc.h" @@ -81,6 +82,9 @@ opal_finalize_util(void) the malloc code turning off doesn't affect opal_output that much */ opal_output_finalize(); + + /* close the dss */ + opal_dss_close(); /* finalize the class/object system */ opal_class_finalize(); diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 5db5528879..e814fc11e5 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -36,6 +36,7 @@ #include "opal/mca/paffinity/base/base.h" #include "opal/mca/timer/base/base.h" #include "opal/mca/memchecker/base/base.h" +#include "opal/dss/dss.h" #include "opal/runtime/opal_cr.h" #include "opal/mca/crs/base/base.h" @@ -133,6 +134,36 @@ opal_err2str(int errnum) case OPAL_ERR_FILE_OPEN_FAILURE: retval = "File open failure"; break; + case OPAL_ERR_PACK_MISMATCH: + retval = "Pack data mismatch"; + break; + case OPAL_ERR_PACK_FAILURE: + retval = "Data pack failed"; + break; + case OPAL_ERR_UNPACK_FAILURE: + retval = "Data unpack failed"; + break; + case OPAL_ERR_UNPACK_INADEQUATE_SPACE: + retval = "Data unpack had inadequate space"; + break; + case OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER: + retval = "Data unpack would read past end of buffer"; + break; + case OPAL_ERR_OPERATION_UNSUPPORTED: + retval = "Requested operation is not supported on referenced data type"; + break; + case OPAL_ERR_UNKNOWN_DATA_TYPE: + retval = "Unknown data type"; + break; + case OPAL_ERR_BUFFER: + retval = "Buffer error"; + break; + case OPAL_ERR_DATA_TYPE_REDEF: + retval = "Attempt to redefine an existing data type"; + break; + case OPAL_ERR_DATA_OVERWRITE_ATTEMPT: + retval = "Attempt to overwrite a data value"; + break; default: retval = NULL; } @@ -212,6 +243,13 @@ opal_init_util(void) goto return_error; } + /* + * Initialize the data storage service. + */ + if (OPAL_SUCCESS != (ret = opal_dss_open())) { + error = "opal_dss_open"; + goto return_error; + } return OPAL_SUCCESS; return_error: diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index 2b6cb68274..738879b7f1 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -43,13 +43,14 @@ headers = \ numtostr.h \ num_procs.h \ opal_environ.h \ + opal_getcwd.h \ + opal_pty.h \ os_dirpath.h \ os_path.h \ output.h \ path.h \ pow2.h \ printf.h \ - opal_pty.h \ qsort.h \ show_help.h \ show_help_lex.h \ @@ -75,13 +76,14 @@ libopalutil_la_SOURCES = \ numtostr.c \ num_procs.c \ opal_environ.c \ + opal_getcwd.c \ + opal_pty.c \ os_dirpath.c \ os_path.c \ output.c \ path.c \ pow2.c \ printf.c \ - opal_pty.c \ qsort.c \ show_help.c \ show_help_lex.l \ diff --git a/opal/util/opal_getcwd.c b/opal/util/opal_getcwd.c new file mode 100644 index 0000000000..14f454ec17 --- /dev/null +++ b/opal/util/opal_getcwd.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_STRING_H +#include +#endif + +#include "opal/util/opal_getcwd.h" +#include "opal/constants.h" + + +/* + * Use $PWD instead of getcwd() a) if $PWD exists and b) is a valid + * synonym for the results from getcwd(). If both of these conditions + * are not met, just fall back and use the results of getcwd(). + */ +int opal_getcwd(char *buf, size_t size) +{ + char cwd[OMPI_PATH_MAX]; + char *pwd = getenv("PWD"); + struct stat a, b; + + /* Bozo checks (e.g., if someone accidentally passed -1 to the + unsigned "size" param) */ + if (NULL == buf || size > INT_MAX) { + return OPAL_ERR_BAD_PARAM; + } + + /* Call getcwd() to get a baseline result */ + if (NULL == getcwd(cwd, sizeof(cwd))) { + return OPAL_ERR_IN_ERRNO; + } + +#if !defined(HAVE_SYS_STAT_H) + /* If we don't have stat(), then we can't tell if the $PWD and cwd + are the same, so just fall back to getcwd(). */ + pwd = cwd; +#else + if (NULL == pwd) { + pwd = cwd; + } else { + /* If the two are not the same value, figure out if they are + pointing to the same place */ + if (0 != strcmp(pwd, cwd)) { + /* If we can't stat() what getcwd() gave us, give up */ + if (0 != stat(cwd, &a)) { + return OPAL_ERR_IN_ERRNO; + } + /* If we can't stat() $PWD, then $PWD could just be stale + -- so ignore it. */ + else if (0 != stat(pwd, &b)) { + pwd = cwd; + } + /* Otherwise, we successfully stat()'ed them both, so + compare. If either the device or inode is not the + same, then fallback to getcwd(). */ + else { + if (a.st_dev != b.st_dev || a.st_ino != b.st_ino) { + pwd = cwd; + } + } + } + } +#endif + + /* If we got here, pwd is pointing to the result that we want to + give. Ensure the user's buffer is long enough. If it is, copy + in the value and be done. */ + if (strlen(pwd) > size) { + return OPAL_ERR_TEMP_OUT_OF_RESOURCE; + } + strcpy(buf, pwd); + return OPAL_SUCCESS; +} diff --git a/opal/util/opal_getcwd.h b/opal/util/opal_getcwd.h new file mode 100644 index 0000000000..beb0909c56 --- /dev/null +++ b/opal/util/opal_getcwd.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * Per https://svn.open-mpi.org/trac/ompi/ticket/933, use a + * combination of $PWD and getcwd() to find the current working + * directory. + */ + +#ifndef OPAL_GETCWD_H +#define OPAL_GETCWD_H + +BEGIN_C_DECLS + +/** + * Per https://svn.open-mpi.org/trac/ompi/ticket/933, use a + * combination of $PWD and getcwd() to find the current working + * directory. + * + * Use $PWD instead of getcwd() a) if $PWD exists and b) is a valid + * synonym for the results from getcwd(). If both of these conditions + * are not met, just fall back and use the results of getcwd(). + * + * @param buf Caller-allocated buffer to put the result + * @param size Length of the buf array + * + * @retval OPAL_ERR_OUT_OF_RESOURCE If internal malloc() fails. + * @retval OPAL_ERR_TEMP_OUT_OF_RESOURCE If the supplied buf buffer + * was not long enough to handle the result. + * @retval OPAL_ERR_BAD_PARAM If buf is NULL or size>INT_MAX + * @retval OPAL_ERR_IN_ERRNO If an other error occurred + * @retval OPAL_SUCCESS If all went well and a valid value was placed + * in the buf buffer. + */ +OPAL_DECLSPEC int opal_getcwd(char *buf, size_t size); + + +END_C_DECLS + +#endif /* OPAL_GETCWD_H */ diff --git a/opal/util/output.c b/opal/util/output.c index c20b539385..85174f730c 100644 --- a/opal/util/output.c +++ b/opal/util/output.c @@ -726,3 +726,12 @@ static void output(int output_id, const char *format, va_list arglist) free(str); } } + +int opal_output_get_verbosity(int output_id) +{ + if (output_id >= 0 && output_id < OPAL_OUTPUT_MAX_STREAMS && info[output_id].ldi_used) { + return info[output_id].ldi_verbose_level; + } else { + return -1; + } +} diff --git a/opal/util/output.h b/opal/util/output.h index 46dddb9c0c..12423715fa 100644 --- a/opal/util/output.h +++ b/opal/util/output.h @@ -73,9 +73,7 @@ #include "opal/class/opal_object.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /** * \class opal_output_stream_t @@ -392,6 +390,14 @@ struct opal_output_stream_t { */ OPAL_DECLSPEC void opal_output_set_verbosity(int output_id, int level); + /** + * Get the verbosity level for a stream + * + * @param output_id Stream id returned from opal_output_open() + * @returns Verbosity of stream + */ + OPAL_DECLSPEC int opal_output_get_verbosity(int output_id); + /** * Set characteristics for output files. * @@ -485,9 +491,7 @@ struct opal_output_stream_t { */ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_output_stream_t); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif /* OPAL_OUTPUT_H_ */ diff --git a/opal/util/printf.h b/opal/util/printf.h index a23cacd669..bca3a58d75 100644 --- a/opal/util/printf.h +++ b/opal/util/printf.h @@ -29,10 +29,7 @@ #include #include -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - +BEGIN_C_DECLS /** * Writes to a string under the control of a format string @@ -127,9 +124,7 @@ OPAL_DECLSPEC int opal_asprintf(char **ptr, const char *fmt, ...) __opal_attrib OPAL_DECLSPEC int opal_vasprintf(char **ptr, const char *fmt, va_list ap) __opal_attribute_format__(__printf__, 2, 0); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif /* OPAL_PRINTF_H */ diff --git a/orte/Makefile.am b/orte/Makefile.am index 70017ea238..fd6512a536 100644 --- a/orte/Makefile.am +++ b/orte/Makefile.am @@ -35,7 +35,6 @@ DIST_SUBDIRS = \ lib_LTLIBRARIES = libopen-rte.la libopen_rte_la_SOURCES = libopen_rte_la_LIBADD = \ - dss/libdss.la \ $(MCA_orte_FRAMEWORK_LIBS) \ $(top_ompi_builddir)/opal/libopen-pal.la libopen_rte_la_DEPENDENCIES = $(libopen_rte_la_LIBADD) @@ -58,7 +57,6 @@ ortedir = $(includedir) endif include class/Makefile.am -include dss/Makefile.am include runtime/Makefile.am include util/Makefile.am include tools/Makefile.am diff --git a/orte/class/orte_pointer_array.c b/orte/class/orte_pointer_array.c index 06926fb1ac..275248333e 100644 --- a/orte/class/orte_pointer_array.c +++ b/orte/class/orte_pointer_array.c @@ -17,13 +17,13 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include #include #include #include -#include "orte/orte_constants.h" #include "orte/class/orte_pointer_array.h" #include "opal/util/output.h" diff --git a/orte/class/orte_pointer_array.h b/orte/class/orte_pointer_array.h index 5ad92c1813..e0e16aaa6d 100644 --- a/orte/class/orte_pointer_array.h +++ b/orte/class/orte_pointer_array.h @@ -25,7 +25,7 @@ #define ORTE_POINTER_ARRAY_H #include "orte_config.h" -#include "orte/orte_types.h" +#include "orte/types.h" #if HAVE_STRING_H #include @@ -34,9 +34,7 @@ #include "opal/threads/mutex.h" #include "opal/class/opal_object.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /** * dynamic pointer array @@ -225,7 +223,6 @@ static inline void orte_pointer_array_free_clear(orte_pointer_array_t *array) ORTE_DECLSPEC bool orte_pointer_array_test_and_set_item (orte_pointer_array_t *table, orte_std_cntr_t element_index, void *value); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS + #endif /* OMPI_POINTER_ARRAY_H */ diff --git a/orte/class/orte_proc_table.c b/orte/class/orte_proc_table.c index aa566ab984..c3bdd019af 100644 --- a/orte/class/orte_proc_table.c +++ b/orte/class/orte_proc_table.c @@ -17,15 +17,18 @@ */ #include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" #include #include -#include "orte/orte_constants.h" #include "opal/util/output.h" + +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "orte/class/orte_proc_table.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/ns/ns_types.h" /* * orte_process_name_hash_node_t @@ -45,6 +48,19 @@ static OBJ_CLASS_INSTANCE( NULL, NULL); +typedef struct +{ + opal_list_item_t super; + orte_process_name_t hn_key; + orte_process_name_t hn_value; +} orte_proc_hash_node_name_t; + +static OBJ_CLASS_INSTANCE( + orte_proc_hash_node_name_t, + opal_list_item_t, + NULL, + NULL); + #define GET_KEY(proc) \ ( (((uint32_t) proc->jobid) << 24) + ((uint32_t) proc->vpid) ) @@ -65,7 +81,7 @@ void* orte_hash_table_get_proc(opal_hash_table_t* ht, for(node = (orte_proc_hash_node_t*)opal_list_get_first(list); node != (orte_proc_hash_node_t*)opal_list_get_end(list); node = (orte_proc_hash_node_t*)opal_list_get_next(node)) { - if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { return node->hn_value; } } @@ -92,7 +108,7 @@ int orte_hash_table_set_proc( for(node = (orte_proc_hash_node_t*)opal_list_get_first(list); node != (orte_proc_hash_node_t*)opal_list_get_end(list); node = (orte_proc_hash_node_t*)opal_list_get_next(node)) { - if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { node->hn_value = value; return ORTE_SUCCESS; } @@ -130,7 +146,100 @@ int orte_hash_table_remove_proc( for(node = (orte_proc_hash_node_t*)opal_list_get_first(list); node != (orte_proc_hash_node_t*)opal_list_get_end(list); node = (orte_proc_hash_node_t*)opal_list_get_next(node)) { - if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { + opal_list_remove_item(list, (opal_list_item_t*)node); + opal_list_append(&ht->ht_nodes, (opal_list_item_t*)node); + ht->ht_size--; + return ORTE_SUCCESS; + } + } + return ORTE_ERR_NOT_FOUND; +} + +orte_process_name_t orte_hash_table_get_proc_name(opal_hash_table_t* ht, + const orte_process_name_t* proc, + orte_ns_cmp_bitmask_t mask) +{ + uint32_t key = GET_KEY(proc); + opal_list_t* list = ht->ht_table + (key & ht->ht_mask); + orte_proc_hash_node_name_t *node; + +#if OMPI_ENABLE_DEBUG + if(ht->ht_table_size == 0) { + opal_output(0, "opal_hash_table_get_proc_name:" + "opal_hash_table_init() has not been called"); + return *ORTE_NAME_INVALID; + } +#endif + for(node = (orte_proc_hash_node_name_t*)opal_list_get_first(list); + node != (orte_proc_hash_node_name_t*)opal_list_get_end(list); + node = (orte_proc_hash_node_name_t*)opal_list_get_next(node)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &node->hn_key, proc)) { + return node->hn_value; + } + } + return *ORTE_NAME_INVALID; +} + + +int orte_hash_table_set_proc_name(opal_hash_table_t* ht, + const orte_process_name_t* proc, + const orte_process_name_t *value, + orte_ns_cmp_bitmask_t mask) +{ + uint32_t key = GET_KEY(proc); + opal_list_t* list = ht->ht_table + (key & ht->ht_mask); + orte_proc_hash_node_name_t *node; + +#if OMPI_ENABLE_DEBUG + if(ht->ht_table_size == 0) { + opal_output(0, "opal_hash_table_set_proc_name:" + "opal_hash_table_init() has not been called"); + return ORTE_ERR_BAD_PARAM; + } +#endif + for(node = (orte_proc_hash_node_name_t*)opal_list_get_first(list); + node != (orte_proc_hash_node_name_t*)opal_list_get_end(list); + node = (orte_proc_hash_node_name_t*)opal_list_get_next(node)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &node->hn_key, proc)) { + node->hn_value = *value; + return ORTE_SUCCESS; + } + } + + node = (orte_proc_hash_node_name_t*)opal_list_remove_first(&ht->ht_nodes); + if(NULL == node) { + node = OBJ_NEW(orte_proc_hash_node_name_t); + if(NULL == node) + return ORTE_ERR_OUT_OF_RESOURCE; + } + node->hn_key = *proc; + node->hn_value = *value; + opal_list_append(list, (opal_list_item_t*)node); + ht->ht_size++; + return ORTE_SUCCESS; +} + + +int orte_hash_table_remove_proc_name(opal_hash_table_t* ht, + const orte_process_name_t* proc, + orte_ns_cmp_bitmask_t mask) +{ + uint32_t key = GET_KEY(proc); + opal_list_t* list = ht->ht_table + (key & ht->ht_mask); + orte_proc_hash_node_name_t *node; + +#if OMPI_ENABLE_DEBUG + if(ht->ht_table_size == 0) { + opal_output(0, "opal_hash_table_remove_proc_name:" + "opal_hash_table_init() has not been called"); + return ORTE_ERR_BAD_PARAM; + } +#endif + for(node = (orte_proc_hash_node_name_t*)opal_list_get_first(list); + node != (orte_proc_hash_node_name_t*)opal_list_get_end(list); + node = (orte_proc_hash_node_name_t*)opal_list_get_next(node)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &node->hn_key, proc)) { opal_list_remove_item(list, (opal_list_item_t*)node); opal_list_append(&ht->ht_nodes, (opal_list_item_t*)node); ht->ht_size--; diff --git a/orte/class/orte_proc_table.h b/orte/class/orte_proc_table.h index 4e825984de..55e4996b15 100644 --- a/orte/class/orte_proc_table.h +++ b/orte/class/orte_proc_table.h @@ -25,13 +25,14 @@ #ifndef ORTE_PROC_TABLE_H #define ORTE_PROC_TABLE_H +#include "orte_config.h" +#include "orte/types.h" + #include "opal/class/opal_hash_table.h" -#include "orte/mca/ns/ns_types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +#include "orte/util/name_fns.h" +BEGIN_C_DECLS /** * Retrieve value via orte_process_name_t key. @@ -52,7 +53,7 @@ ORTE_DECLSPEC void *orte_hash_table_get_proc( * @param table The input hash table (IN). * @param key The input key (IN). * @param value The value to be associated with the key (IN). - * @return OMPI return code. + * @return ORTE return code. * */ @@ -66,7 +67,7 @@ ORTE_DECLSPEC int orte_hash_table_set_proc( * * @param table The input hash table (IN). * @param key The input key (IN). - * @return OMPI return code. + * @return ORTE return code. * */ @@ -75,9 +76,52 @@ ORTE_DECLSPEC int orte_hash_table_remove_proc( const orte_process_name_t* key); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +/** +* Retrieve process name via orte_process_name_t key. + * + * @param table The input hash table (IN). + * @param key The input key (IN). + * @return The process name associated with the key or + * ORTE_NAME_INVALID if the item is not found. + * + */ + +ORTE_DECLSPEC orte_process_name_t orte_hash_table_get_proc_name( + opal_hash_table_t* table, + const orte_process_name_t* key, + orte_ns_cmp_bitmask_t mask); + +/** +* Set process name based on uint32_t key. + * + * @param table The input hash table (IN). + * @param key The input key (IN). + * @param value The process name to be associated with the key (IN). + * @return ORTE return code. + * + */ + +ORTE_DECLSPEC int orte_hash_table_set_proc_name( + opal_hash_table_t* table, + const orte_process_name_t*, + const orte_process_name_t*, + orte_ns_cmp_bitmask_t mask); + +/** +* Remove process name based on uint32_t key. + * + * @param table The input hash table (IN). + * @param key The input key (IN). + * @return ORTE return code. + * + */ + +ORTE_DECLSPEC int orte_hash_table_remove_proc_name( + opal_hash_table_t* table, + const orte_process_name_t* key, + orte_ns_cmp_bitmask_t mask); + +END_C_DECLS #endif /* OMPI_HASH_TABLE_H */ diff --git a/orte/class/orte_value_array.c b/orte/class/orte_value_array.c index df8531948b..4db3559c38 100644 --- a/orte/class/orte_value_array.c +++ b/orte/class/orte_value_array.c @@ -17,8 +17,8 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" #include "orte/class/orte_value_array.h" diff --git a/orte/class/orte_value_array.h b/orte/class/orte_value_array.h index cb800c9d8e..9edf51435a 100644 --- a/orte/class/orte_value_array.h +++ b/orte/class/orte_value_array.h @@ -19,10 +19,11 @@ #ifndef ORTE_VALUE_ARRAY_H #define ORTE_VALUE_ARRAY_H -#include #include "orte_config.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" +#include "orte/constants.h" +#include "orte/types.h" + +#include #include "opal/class/opal_object.h" #if OMPI_ENABLE_DEBUG @@ -35,9 +36,8 @@ * See ompi_bitmap.h for an explanation of why there is a split * between OMPI and ORTE for this generic class. */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif + +BEGIN_C_DECLS struct orte_value_array_t { @@ -275,9 +275,8 @@ static inline int orte_value_array_remove_item(orte_value_array_t *array, orte_s #define ORTE_VALUE_ARRAY_GET_BASE(array, item_type) \ ((item_type*) ((array)->array_items)) -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS + #endif diff --git a/orte/dss/dss_arith.c b/orte/dss/dss_arith.c deleted file mode 100644 index 741a2be738..0000000000 --- a/orte/dss/dss_arith.c +++ /dev/null @@ -1,856 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/odls/odls_types.h" - -#include "orte/dss/dss_internal.h" - -static void orte_dss_arith_int(int *value, int *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_uint(uint *value, uint *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_size(size_t *value, size_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_pid(pid_t *value, pid_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_byte(uint8_t *value, uint8_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_int8(int8_t *value, int8_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_int16(int16_t *value, int16_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_uint16(uint16_t *value, uint16_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_int32(int32_t *value, int32_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_uint32(uint32_t *value, uint32_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_int64(int64_t *value, int64_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_uint64(uint64_t *value, uint64_t *operand, orte_dss_arith_op_t operation); - -static void orte_dss_arith_std_cntr(orte_std_cntr_t *value, orte_std_cntr_t *operand, orte_dss_arith_op_t operation); - -/* some weird ones - but somebody *might* want to do it, I suppose... */ -static void orte_dss_arith_data_type(orte_data_type_t *value, orte_data_type_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_daemon_cmd(orte_daemon_cmd_flag_t *value, orte_daemon_cmd_flag_t *operand, orte_dss_arith_op_t operation); - -#if OPAL_ENABLE_FT == 1 -static void orte_dss_arith_ckpt_cmd(orte_daemon_cmd_flag_t *value, orte_daemon_cmd_flag_t *operand, orte_dss_arith_op_t operation); -#endif - -int orte_dss_arith(orte_data_value_t *value, orte_data_value_t *operand, orte_dss_arith_op_t operation) -{ - /* check for error */ - if (NULL == value || NULL == operand) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (operand->type != value->type) { - ORTE_ERROR_LOG(ORTE_ERR_TYPE_MISMATCH); - return ORTE_ERR_TYPE_MISMATCH; - } - - /* Lookup the arith function for this type and call it */ - - switch(operand->type) { - case ORTE_INT: - orte_dss_arith_int((int*)value->data, (int*)operand->data, operation); - break; - - case ORTE_UINT: - orte_dss_arith_uint((uint*)value->data, (uint*)operand->data, operation); - break; - - case ORTE_SIZE: - orte_dss_arith_size((size_t*)value->data, (size_t*)operand->data, operation); - break; - - case ORTE_PID: - orte_dss_arith_pid((pid_t*)value->data, (pid_t*)operand->data, operation); - break; - - case ORTE_BYTE: - case ORTE_UINT8: - orte_dss_arith_byte((uint8_t*)value->data, (uint8_t*)operand->data, operation); - break; - - case ORTE_INT8: - orte_dss_arith_int8((int8_t*)value->data, (int8_t*)operand->data, operation); - break; - - case ORTE_INT16: - orte_dss_arith_int16((int16_t*)value->data, (int16_t*)operand->data, operation); - break; - - case ORTE_UINT16: - orte_dss_arith_uint16((uint16_t*)value->data, (uint16_t*)operand->data, operation); - break; - - case ORTE_INT32: - orte_dss_arith_int32((int32_t*)value->data, (int32_t*)operand->data, operation); - break; - - case ORTE_UINT32: - orte_dss_arith_uint32((uint32_t*)value->data, (uint32_t*)operand->data, operation); - break; - - case ORTE_INT64: - orte_dss_arith_int64((int64_t*)value->data, (int64_t*)operand->data, operation); - break; - - case ORTE_UINT64: - orte_dss_arith_uint64((uint64_t*)value->data, (uint64_t*)operand->data, operation); - break; - - case ORTE_STD_CNTR: - orte_dss_arith_std_cntr((orte_std_cntr_t*)value->data, (orte_std_cntr_t*)operand->data, operation); - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return ORTE_ERR_OPERATION_UNSUPPORTED; - } - - return ORTE_SUCCESS; -} - -int orte_dss_increment(orte_data_value_t *value) -{ - int one; - unsigned int uone; - size_t sone; - pid_t pone; - uint8_t u8one; - int8_t i8one; - uint16_t u16one; - int16_t i16one; - uint32_t u32one; - int32_t i32one; - uint64_t u64one; - int64_t i64one; - orte_daemon_cmd_flag_t daemoncmdone; - orte_data_type_t datatypeone; - orte_std_cntr_t stdcntrone; -#if OPAL_ENABLE_FT == 1 - orte_daemon_cmd_flag_t ckptcmdone; -#endif - - /* check for error */ - if (NULL == value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - /* Lookup the arith function for this type and call it */ - - switch(value->type) { - case ORTE_INT: - one = 1; - orte_dss_arith_int((int*)value->data, &one, ORTE_DSS_ADD); - break; - - case ORTE_UINT: - uone = 1; - orte_dss_arith_uint((uint*)value->data, &uone, ORTE_DSS_ADD); - break; - - case ORTE_SIZE: - sone = 1; - orte_dss_arith_size((size_t*)value->data, &sone, ORTE_DSS_ADD); - break; - - case ORTE_PID: - pone = 1; - orte_dss_arith_pid((pid_t*)value->data, &pone, ORTE_DSS_ADD); - break; - - case ORTE_BYTE: - case ORTE_UINT8: - u8one = 1; - orte_dss_arith_byte((uint8_t*)value->data, &u8one, ORTE_DSS_ADD); - break; - - case ORTE_INT8: - i8one = 1; - orte_dss_arith_int8((int8_t*)value->data, &i8one, ORTE_DSS_ADD); - break; - - case ORTE_INT16: - i16one = 1; - orte_dss_arith_int16((int16_t*)value->data, &i16one, ORTE_DSS_ADD); - break; - - case ORTE_UINT16: - u16one = 1; - orte_dss_arith_uint16((uint16_t*)value->data, &u16one, ORTE_DSS_ADD); - break; - - case ORTE_INT32: - i32one = 1; - orte_dss_arith_int32((int32_t*)value->data, &i32one, ORTE_DSS_ADD); - break; - - case ORTE_UINT32: - u32one = 1; - orte_dss_arith_uint32((uint32_t*)value->data, &u32one, ORTE_DSS_ADD); - break; - - case ORTE_INT64: - i64one = 1; - orte_dss_arith_int64((int64_t*)value->data, &i64one, ORTE_DSS_ADD); - break; - - case ORTE_UINT64: - u64one = 1; - orte_dss_arith_uint64((uint64_t*)value->data, &u64one, ORTE_DSS_ADD); - break; - - case ORTE_DAEMON_CMD: - daemoncmdone = 1; - orte_dss_arith_daemon_cmd((orte_daemon_cmd_flag_t*)value->data, &daemoncmdone, ORTE_DSS_ADD); - break; - -#if OPAL_ENABLE_FT == 1 - case ORTE_CKPT_CMD: - ckptcmdone = 1; - orte_dss_arith_ckpt_cmd(value->data, &ckptcmdone, ORTE_DSS_ADD); - break; -#endif - - case ORTE_DATA_TYPE: - datatypeone = 1; - orte_dss_arith_data_type((orte_data_type_t*)value->data, &datatypeone, ORTE_DSS_ADD); - break; - - case ORTE_STD_CNTR: - stdcntrone = 1; - orte_dss_arith_std_cntr((orte_std_cntr_t*)value->data, &stdcntrone, ORTE_DSS_ADD); - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return ORTE_ERR_OPERATION_UNSUPPORTED; - } - - return ORTE_SUCCESS; -} - -int orte_dss_decrement(orte_data_value_t *value) -{ - int one; - unsigned int uone; - size_t sone; - pid_t pone; - uint8_t u8one; - int8_t i8one; - uint16_t u16one; - int16_t i16one; - uint32_t u32one; - int32_t i32one; - uint64_t u64one; - int64_t i64one; - orte_daemon_cmd_flag_t daemoncmdone; - orte_data_type_t datatypeone; - orte_std_cntr_t stdcntrone; -#if OPAL_ENABLE_FT == 1 - orte_daemon_cmd_flag_t ckptcmdone; -#endif - - /* check for error */ - if (NULL == value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - /* Lookup the arith function for this type and call it */ - - switch(value->type) { - case ORTE_INT: - one = 1; - orte_dss_arith_int((int*)value->data, &one, ORTE_DSS_SUB); - break; - - case ORTE_UINT: - uone = 1; - orte_dss_arith_uint((uint*)value->data, &uone, ORTE_DSS_SUB); - break; - - case ORTE_SIZE: - sone = 1; - orte_dss_arith_size((size_t*)value->data, &sone, ORTE_DSS_SUB); - break; - - case ORTE_PID: - pone = 1; - orte_dss_arith_pid((pid_t*)value->data, &pone, ORTE_DSS_SUB); - break; - - case ORTE_BYTE: - case ORTE_UINT8: - u8one = 1; - orte_dss_arith_byte((uint8_t*)value->data, &u8one, ORTE_DSS_SUB); - break; - - case ORTE_INT8: - i8one = 1; - orte_dss_arith_int8((int8_t*)value->data, &i8one, ORTE_DSS_SUB); - break; - - case ORTE_INT16: - i16one = 1; - orte_dss_arith_int16((int16_t*)value->data, &i16one, ORTE_DSS_SUB); - break; - - case ORTE_UINT16: - u16one = 1; - orte_dss_arith_uint16((uint16_t*)value->data, &u16one, ORTE_DSS_SUB); - break; - - case ORTE_INT32: - i32one = 1; - orte_dss_arith_int32((int32_t*)value->data, &i32one, ORTE_DSS_SUB); - break; - - case ORTE_UINT32: - u32one = 1; - orte_dss_arith_uint32((uint32_t*)value->data, &u32one, ORTE_DSS_SUB); - break; - - case ORTE_INT64: - i64one = 1; - orte_dss_arith_int64((int64_t*)value->data, &i64one, ORTE_DSS_SUB); - break; - - case ORTE_UINT64: - u64one = 1; - orte_dss_arith_uint64((uint64_t*)value->data, &u64one, ORTE_DSS_SUB); - break; - - case ORTE_DAEMON_CMD: - daemoncmdone = 1; - orte_dss_arith_daemon_cmd((orte_daemon_cmd_flag_t*)value->data, &daemoncmdone, ORTE_DSS_SUB); - break; - -#if OPAL_ENABLE_FT == 1 - case ORTE_CKPT_CMD: - ckptcmdone = 1; - orte_dss_arith_ckpt_cmd(value->data, &ckptcmdone, ORTE_DSS_SUB); - break; -#endif - - case ORTE_DATA_TYPE: - datatypeone = 1; - orte_dss_arith_data_type((orte_data_type_t*)value->data, &datatypeone, ORTE_DSS_SUB); - break; - - case ORTE_STD_CNTR: - stdcntrone = 1; - orte_dss_arith_std_cntr((orte_std_cntr_t*)value->data, &stdcntrone, ORTE_DSS_SUB); - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return ORTE_ERR_OPERATION_UNSUPPORTED; - } - - return ORTE_SUCCESS; -} - -/* - * NUMERIC arith FUNCTIONS - */ -static void orte_dss_arith_int(int *value, int *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_uint(uint *value, uint *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_size(size_t *value, size_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_pid(pid_t *value, pid_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_byte(uint8_t *value, uint8_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_int8(int8_t *value, int8_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_int16(int16_t *value, int16_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_uint16(uint16_t *value, uint16_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_int32(int32_t *value, int32_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_uint32(uint32_t *value, uint32_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_int64(int64_t *value, int64_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_uint64(uint64_t *value, uint64_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_std_cntr(orte_std_cntr_t *value, orte_std_cntr_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_data_type(orte_data_type_t *value, orte_data_type_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_daemon_cmd(orte_daemon_cmd_flag_t *value, orte_daemon_cmd_flag_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -#if OPAL_ENABLE_FT == 1 -static void orte_dss_arith_ckpt_cmd(orte_daemon_cmd_flag_t *value, orte_daemon_cmd_flag_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - return; - - case ORTE_DSS_SUB: - (*value) -= *operand; - return; - - case ORTE_DSS_MUL: - (*value) *= *operand; - return; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - return; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - return; -} -#endif diff --git a/orte/dss/dss_compare.c b/orte/dss/dss_compare.c deleted file mode 100644 index 005a2c88c2..0000000000 --- a/orte/dss/dss_compare.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" -#include "opal/util/output.h" - -int orte_dss_compare(void *value1, void *value2, orte_data_type_t type) -{ - orte_dss_type_info_t *info; - - /* check for error */ - if (NULL == value1 || NULL == value2) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* Lookup the compare function for this type and call it */ - - if (!(type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - return info->odti_compare_fn(value1, value2, type); -} - -/* - * NUMERIC COMPARE FUNCTIONS - */ -int orte_dss_compare_int(int *value1, int *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint(unsigned int *value1, unsigned int *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_size(size_t *value1, size_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_pid(pid_t *value1, pid_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_byte(char *value1, char *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_char(char *value1, char *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_int8(int8_t *value1, int8_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint8(uint8_t *value1, uint8_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_int16(int16_t *value1, int16_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint16(uint16_t *value1, uint16_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_int32(int32_t *value1, int32_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint32(uint32_t *value1, uint32_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_int64(int64_t *value1, int64_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint64(uint64_t *value1, uint64_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -/* - * NON-NUMERIC SYSTEM TYPES - */ - -/* NULL */ -int orte_dss_compare_null(char *value1, char *value2, orte_data_type_t type) -{ - return ORTE_EQUAL; -} - -/* BOOL */ -int orte_dss_compare_bool(bool *value1, bool *value2, orte_data_type_t type) -{ - if (*value1 && !(*value2)) return ORTE_VALUE1_GREATER; - - if (*value2 && !(*value1)) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; - -} - -/* STRING */ -int orte_dss_compare_string(char *value1, char *value2, orte_data_type_t type) -{ - if (0 < strcmp(value1, value2)) return ORTE_VALUE2_GREATER; - - if (0 > strcmp(value1, value2)) return ORTE_VALUE1_GREATER; - - return ORTE_EQUAL; -} - -/* COMPARE FUNCTIONS FOR GENERIC ORTE TYPES */ - -/* ORTE_STD_CNTR */ -int orte_dss_compare_std_cntr(orte_std_cntr_t *value1, orte_std_cntr_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -/* ORTE_DATA_TYPE */ -int orte_dss_compare_dt(orte_data_type_t *value1, orte_data_type_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -#if OPAL_ENABLE_FT == 1 -/* ORTE_CKPT_CMD */ -int orte_dss_compare_ckpt_cmd(size_t *value1, size_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} -#endif - -/* ORTE_DATA_VALUE */ -int orte_dss_compare_data_value(orte_data_value_t *value1, orte_data_value_t *value2, orte_data_type_t type) -{ - /* can't compare if the two types don't match */ - if (value1->type != value2->type) { - ORTE_ERROR_LOG(ORTE_ERR_TYPE_MISMATCH); - return ORTE_ERR_TYPE_MISMATCH; - } - - /* okay, go ahead and compare the values themselves */ - return orte_dss.compare(value1->data, value2->data, value1->type); -} - -/* ORTE_BYTE_OBJECT */ -int orte_dss_compare_byte_object(orte_byte_object_t *value1, orte_byte_object_t *value2, orte_data_type_t type) -{ - int checksum, diff; - orte_std_cntr_t i; - - /* compare the sizes first - bigger size object is "greater than" */ - if (value1->size > value2->size) return ORTE_VALUE1_GREATER; - - if (value2->size > value1->size) return ORTE_VALUE2_GREATER; - - /* get here if the two sizes are identical - now do a simple checksum-style - * calculation to determine "biggest" - */ - checksum = 0; - - for (i=0; i < value1->size; i++) { - /* protect against overflows */ - diff = value1->bytes[i] - value2->bytes[i]; - if (INT_MAX-abs(checksum)-abs(diff) < 0) { /* got an overflow condition */ - checksum = 0; - } - checksum += diff; - } - - if (0 > checksum) return ORTE_VALUE2_GREATER; /* sum of value2 bytes was greater */ - - if (0 < checksum) return ORTE_VALUE1_GREATER; /* of value1 bytes was greater */ - - return ORTE_EQUAL; /* sum of both value's bytes was identical */ -} diff --git a/orte/dss/dss_copy.c b/orte/dss/dss_copy.c deleted file mode 100644 index 340f82334f..0000000000 --- a/orte/dss/dss_copy.c +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" -#include "opal/util/output.h" - -int orte_dss_copy(void **dest, void *src, orte_data_type_t type) -{ - int rc; - orte_dss_type_info_t *info; - - /* check for error */ - if (NULL == dest) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (NULL == src && (ORTE_NULL != type && ORTE_STRING != type)) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* Lookup the copy function for this type and call it */ - - if (!(type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - if (ORTE_SUCCESS != (rc = info->odti_copy_fn(dest, src, type))) { - ORTE_ERROR_LOG(rc); - } - - - return rc; -} - -/* - * STANDARD COPY FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ -int orte_dss_std_copy(void **dest, void *src, orte_data_type_t type) -{ - size_t datasize; - uint8_t *val = NULL; - - switch(type) { - case ORTE_BOOL: - datasize = sizeof(bool); - break; - - case ORTE_INT: - case ORTE_UINT: - datasize = sizeof(int); - break; - - case ORTE_SIZE: - datasize = sizeof(size_t); - break; - - case ORTE_PID: - datasize = sizeof(pid_t); - break; - - case ORTE_BYTE: - case ORTE_INT8: - case ORTE_UINT8: - datasize = 1; - break; - - case ORTE_INT16: - case ORTE_UINT16: - datasize = 2; - break; - - case ORTE_INT32: - case ORTE_UINT32: - datasize = 4; - break; - - case ORTE_INT64: - case ORTE_UINT64: - datasize = 8; - break; - - case ORTE_STD_CNTR: - datasize = sizeof(orte_std_cntr_t); - break; - - case ORTE_DATA_TYPE: - datasize = sizeof(orte_data_type_t); - break; - -#if OPAL_ENABLE_FT == 1 - case ORTE_CKPT_CMD: - datasize = sizeof(size_t); - break; -#endif - - default: - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - val = (uint8_t*)malloc(datasize); - if (NULL == val) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - memcpy(val, src, datasize); - *dest = val; - - return ORTE_SUCCESS; -} - -/* COPY FUNCTIONS FOR NON-STANDARD SYSTEM TYPES */ - -/* - * NULL - */ -int orte_dss_copy_null(char **dest, char *src, orte_data_type_t type) -{ - char *val; - - *dest = (char*)malloc(sizeof(char*)); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - val = *dest; /* save the address of the value */ - - /* set the dest to null */ - *val = 0x00; - - return ORTE_SUCCESS; -} - -/* - * STRING - */ -int orte_dss_copy_string(char **dest, char *src, orte_data_type_t type) -{ - if (NULL == src) { /* got zero-length string/NULL pointer - store NULL */ - *dest = NULL; - } else { - *dest = strdup(src); - } - - return ORTE_SUCCESS; -} - -/* COPY FUNCTIONS FOR GENERIC ORTE TYPES */ - -/* - * ORTE_DATA_VALUE - */ -int orte_dss_copy_data_value(orte_data_value_t **dest, orte_data_value_t *src, - orte_data_type_t type) -{ - int rc; - - /* create the new object */ - *dest = OBJ_NEW(orte_data_value_t); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - (*dest)->type = src->type; - - /* copy the payload with its associated copy function */ - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((*dest)->data), src->data, src->type))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*dest); - return rc; - } - - return ORTE_SUCCESS; -} - - -/* - * ORTE_BYTE_OBJECT - */ -int orte_dss_copy_byte_object(orte_byte_object_t **dest, orte_byte_object_t *src, - orte_data_type_t type) -{ - /* allocate space for the new object */ - *dest = (orte_byte_object_t*)malloc(sizeof(orte_byte_object_t)); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - (*dest)->size = src->size; - - /* allocate the required space for the bytes */ - (*dest)->bytes = (uint8_t*)malloc(src->size); - if (NULL == (*dest)->bytes) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(*dest); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* copy the data across */ - memcpy((*dest)->bytes, src->bytes, src->size); - - return ORTE_SUCCESS; -} diff --git a/orte/dss/dss_internal.h b/orte/dss/dss_internal.h deleted file mode 100644 index f218411d41..0000000000 --- a/orte/dss/dss_internal.h +++ /dev/null @@ -1,507 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef ORTE_DSS_INTERNAL_H_ -#define ORTE_DSS_INTERNAL_H_ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/class/orte_pointer_array.h" - -#include "orte/dss/dss.h" - -#if HAVE_STRING_H -# if !defined(STDC_HEADERS) && HAVE_MEMORY_H -# include -# endif -# include -#endif - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * The default starting chunk size - */ -#define ORTE_DSS_DEFAULT_INITIAL_SIZE 128 -/* - * The default threshold size when we switch from doubling the - * buffer size to addatively increasing it - */ -#define ORTE_DSS_DEFAULT_THRESHOLD_SIZE 1024 - -/* - * Internal type corresponding to size_t. Do not use this in - * interface calls - use ORTE_SIZE instead. - */ -#if SIZEOF_SIZE_T == 1 -#define DSS_TYPE_SIZE_T ORTE_UINT8 -#elif SIZEOF_SIZE_T == 2 -#define DSS_TYPE_SIZE_T ORTE_UINT16 -#elif SIZEOF_SIZE_T == 4 -#define DSS_TYPE_SIZE_T ORTE_UINT32 -#elif SIZEOF_SIZE_T == 8 -#define DSS_TYPE_SIZE_T ORTE_UINT64 -#else -#error Unsupported size_t size! -#endif - -/* - * Internal type corresponding to bool. Do not use this in interface - * calls - use ORTE_BOOL instead. - */ -#if SIZEOF_BOOL == 1 -#define DSS_TYPE_BOOL ORTE_UINT8 -#elif SIZEOF_BOOL == 2 -#define DSS_TYPE_BOOL ORTE_UINT16 -#elif SIZEOF_BOOL == 4 -#define DSS_TYPE_BOOL ORTE_UINT32 -#elif SIZEOF_BOOL == 8 -#define DSS_TYPE_BOOL ORTE_UINT64 -#else -#error Unsupported bool size! -#endif - -/* - * Internal type corresponding to int and unsigned int. Do not use - * this in interface calls - use ORTE_INT / ORTE_UINT instead. - */ -#if SIZEOF_INT == 1 -#define DSS_TYPE_INT ORTE_INT8 -#define DSS_TYPE_UINT ORTE_UINT8 -#elif SIZEOF_INT == 2 -#define DSS_TYPE_INT ORTE_INT16 -#define DSS_TYPE_UINT ORTE_UINT16 -#elif SIZEOF_INT == 4 -#define DSS_TYPE_INT ORTE_INT32 -#define DSS_TYPE_UINT ORTE_UINT32 -#elif SIZEOF_INT == 8 -#define DSS_TYPE_INT ORTE_INT64 -#define DSS_TYPE_UINT ORTE_UINT64 -#else -#error Unsupported int size! -#endif - -/* - * Internal type corresponding to pid_t. Do not use this in interface - * calls - use ORTE_PID instead. - */ -#if SIZEOF_PID_T == 1 -#define DSS_TYPE_PID_T ORTE_UINT8 -#elif SIZEOF_PID_T == 2 -#define DSS_TYPE_PID_T ORTE_UINT16 -#elif SIZEOF_PID_T == 4 -#define DSS_TYPE_PID_T ORTE_UINT32 -#elif SIZEOF_PID_T == 8 -#define DSS_TYPE_PID_T ORTE_UINT64 -#else -#error Unsupported pid_t size! -#endif - -/* Unpack generic size macros */ -#define UNPACK_SIZE_MISMATCH(unpack_type, remote_type, ret) \ -do { \ - switch(remote_type) { \ - case ORTE_UINT8: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint8_t, remote_type); \ - break; \ - case ORTE_INT8: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int8_t, remote_type); \ - break; \ - case ORTE_UINT16: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint16_t, remote_type); \ - break; \ - case ORTE_INT16: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int16_t, remote_type); \ - break; \ - case ORTE_UINT32: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint32_t, remote_type); \ - break; \ - case ORTE_INT32: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int32_t, remote_type); \ - break; \ - case ORTE_UINT64: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint64_t, remote_type); \ - break; \ - case ORTE_INT64: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int64_t, remote_type); \ - break; \ - default: \ - ret = ORTE_ERR_NOT_FOUND; \ - ORTE_ERROR_LOG(ret); \ - } \ -} while (0) - -/* NOTE: do not need to deal with endianness here, as the unpacking of -the underling sender-side type will do that for us. Repeat: the -data in tmpbuf[] is already in host byte order. */ -#define UNPACK_SIZE_MISMATCH_FOUND(unpack_type, tmptype, tmpdsstype) \ -do { \ - orte_std_cntr_t i; \ - tmptype *tmpbuf = (tmptype*)malloc(sizeof(tmptype) * (*num_vals)); \ - ret = orte_dss_unpack_buffer(buffer, tmpbuf, num_vals, tmpdsstype); \ - for (i = 0 ; i < *num_vals ; ++i) { \ - ((unpack_type*) dest)[i] = (unpack_type)(tmpbuf[i]); \ - } \ - free(tmpbuf); \ -} while (0) - - -/** - * Internal struct used for holding registered dss functions - */ -struct orte_dss_type_info_t { - opal_object_t super; - /* type identifier */ - orte_data_type_t odti_type; - /** Debugging string name */ - char *odti_name; - /** Pack function */ - orte_dss_pack_fn_t odti_pack_fn; - /** Unpack function */ - orte_dss_unpack_fn_t odti_unpack_fn; - /** copy function */ - orte_dss_copy_fn_t odti_copy_fn; - /** compare function */ - orte_dss_compare_fn_t odti_compare_fn; - /** size function */ - orte_dss_size_fn_t odti_size_fn; - /** print function */ - orte_dss_print_fn_t odti_print_fn; - /** Release function */ - orte_dss_release_fn_t odti_release_fn; - /** flag to indicate structured data */ - bool odti_structured; -}; -/** - * Convenience typedef - */ -typedef struct orte_dss_type_info_t orte_dss_type_info_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_dss_type_info_t); - -/* - * globals needed within dss - */ -extern bool orte_dss_initialized; -extern bool orte_dss_debug; -extern int orte_dss_verbose; -extern int orte_dss_initial_size; -extern int orte_dss_threshold_size; -extern orte_pointer_array_t *orte_dss_types; -extern orte_data_type_t orte_dss_num_reg_types; - - /* - * Implementations of API functions - */ - - int orte_dss_set(orte_data_value_t *value, void *new_value, orte_data_type_t type); - - int orte_dss_get(void **data, orte_data_value_t *value, orte_data_type_t type); - - int orte_dss_arith(orte_data_value_t *value, orte_data_value_t *operand, orte_dss_arith_op_t operation); - - int orte_dss_increment(orte_data_value_t *value); - - int orte_dss_decrement(orte_data_value_t *value); - - int orte_dss_set_buffer_type(orte_buffer_t *buffer, orte_dss_buffer_type_t type); - - int orte_dss_pack(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, - orte_data_type_t type); - int orte_dss_unpack(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *max_num_vals, - orte_data_type_t type); - - int orte_dss_copy(void **dest, void *src, orte_data_type_t type); - - int orte_dss_compare(void *value1, void *value2, - orte_data_type_t type); - - int orte_dss_print(char **output, char *prefix, void *src, orte_data_type_t type); - - int orte_dss_dump(int output_stream, void *src, orte_data_type_t type); - - int orte_dss_size(size_t *size, void *src, orte_data_type_t type); - - int orte_dss_peek(orte_buffer_t *buffer, orte_data_type_t *type, - orte_std_cntr_t *number); - - int orte_dss_peek_type(orte_buffer_t *buffer, orte_data_type_t *type); - - int orte_dss_unload(orte_buffer_t *buffer, void **payload, - orte_std_cntr_t *bytes_used); - int orte_dss_load(orte_buffer_t *buffer, void *payload, orte_std_cntr_t bytes_used); - - int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src); - - int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src); - - int orte_dss_register(orte_dss_pack_fn_t pack_fn, - orte_dss_unpack_fn_t unpack_fn, - orte_dss_copy_fn_t copy_fn, - orte_dss_compare_fn_t compare_fn, - orte_dss_size_fn_t size_fn, - orte_dss_print_fn_t print_fn, - orte_dss_release_fn_t release_fn, - bool structured, - const char *name, orte_data_type_t *type); - - void orte_dss_release(orte_data_value_t *value); - - char *orte_dss_lookup_data_type(orte_data_type_t type); - - void orte_dss_dump_data_types(int output); - - /* - * Non-API functions - */ - int orte_dss_pack_buffer(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type); - - int orte_dss_unpack_buffer(orte_buffer_t *buffer, void *dst, orte_std_cntr_t *num_vals, - orte_data_type_t type); - - /* - * Internal pack functions - */ - - int orte_dss_pack_null(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_byte(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_bool(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_int(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_int16(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_int32(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_int64(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_sizet(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_pid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_string(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_std_cntr(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num, - orte_data_type_t type); - - int orte_dss_pack_data_type(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -#if OPAL_ENABLE_FT == 1 - int orte_dss_pack_ckpt_cmd(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); -#endif - - int orte_dss_pack_data_value(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_byte_object(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - /* - * Internal unpack functions - */ - - int orte_dss_unpack_null(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_byte(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_bool(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_int(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_int16(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_int32(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_int64(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_sizet(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_pid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_string(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_std_cntr(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num, - orte_data_type_t type); - - int orte_dss_unpack_data_type(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -#if OPAL_ENABLE_FT == 1 - int orte_dss_unpack_ckpt_cmd(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); -#endif - - int orte_dss_unpack_data_value(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_byte_object(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - /* - * Internal copy functions - */ - - int orte_dss_std_copy(void **dest, void *src, orte_data_type_t type); - - int orte_dss_copy_null(char **dest, char *src, orte_data_type_t type); - - int orte_dss_copy_string(char **dest, char *src, orte_data_type_t type); - - int orte_dss_copy_byte_object(orte_byte_object_t **dest, orte_byte_object_t *src, - orte_data_type_t type); - - int orte_dss_copy_data_value(orte_data_value_t **dest, orte_data_value_t *src, - orte_data_type_t type); - /* - * Internal compare functions - */ - - int orte_dss_compare_bool(bool *value1, bool *value2, orte_data_type_t type); - - int orte_dss_compare_int(int *value1, int *value2, orte_data_type_t type); - int orte_dss_compare_uint(unsigned int *value1, unsigned int *value2, orte_data_type_t type); - - int orte_dss_compare_size(size_t *value1, size_t *value2, orte_data_type_t type); - - int orte_dss_compare_pid(pid_t *value1, pid_t *value2, orte_data_type_t type); - - int orte_dss_compare_byte(char *value1, char *value2, orte_data_type_t type); - int orte_dss_compare_char(char *value1, char *value2, orte_data_type_t type); - int orte_dss_compare_int8(int8_t *value1, int8_t *value2, orte_data_type_t type); - int orte_dss_compare_uint8(uint8_t *value1, uint8_t *value2, orte_data_type_t type); - - int orte_dss_compare_int16(int16_t *value1, int16_t *value2, orte_data_type_t type); - int orte_dss_compare_uint16(uint16_t *value1, uint16_t *value2, orte_data_type_t type); - - int orte_dss_compare_int32(int32_t *value1, int32_t *value2, orte_data_type_t type); - int orte_dss_compare_uint32(uint32_t *value1, uint32_t *value2, orte_data_type_t type); - - int orte_dss_compare_int64(int64_t *value1, int64_t *value2, orte_data_type_t type); - int orte_dss_compare_uint64(uint64_t *value1, uint64_t *value2, orte_data_type_t type); - - int orte_dss_compare_null(char *value1, char *value2, orte_data_type_t type); - - int orte_dss_compare_string(char *value1, char *value2, orte_data_type_t type); - - int orte_dss_compare_std_cntr(orte_std_cntr_t *value1, orte_std_cntr_t *value2, orte_data_type_t type); - - int orte_dss_compare_dt(orte_data_type_t *value1, orte_data_type_t *value2, orte_data_type_t type); - -#if OPAL_ENABLE_FT == 1 - int orte_dss_compare_ckpt_cmd(size_t *value1, size_t *value2, orte_data_type_t type); -#endif - - int orte_dss_compare_data_value(orte_data_value_t *value1, orte_data_value_t *value2, orte_data_type_t type); - - int orte_dss_compare_byte_object(orte_byte_object_t *value1, orte_byte_object_t *value2, orte_data_type_t type); - - /* - * Internal size functions - */ - int orte_dss_std_size(size_t *size, void *src, orte_data_type_t type); - - int orte_dss_size_string(size_t *size, char *src, orte_data_type_t type); - - int orte_dss_size_data_value(size_t *size, orte_data_value_t *src, orte_data_type_t type); - - int orte_dss_size_byte_object(size_t *size, orte_byte_object_t *src, orte_data_type_t type); - - /* - * Internal print functions - */ - int orte_dss_print_byte(char **output, char *prefix, uint8_t *src, orte_data_type_t type); - - int orte_dss_print_string(char **output, char *prefix, char *src, orte_data_type_t type); - - int orte_dss_print_size(char **output, char *prefix, size_t *src, orte_data_type_t type); - int orte_dss_print_pid(char **output, char *prefix, pid_t *src, orte_data_type_t type); - int orte_dss_print_bool(char **output, char *prefix, bool *src, orte_data_type_t type); - int orte_dss_print_int(char **output, char *prefix, int *src, orte_data_type_t type); - int orte_dss_print_uint(char **output, char *prefix, int *src, orte_data_type_t type); - int orte_dss_print_uint8(char **output, char *prefix, uint8_t *src, orte_data_type_t type); - int orte_dss_print_uint16(char **output, char *prefix, uint16_t *src, orte_data_type_t type); - int orte_dss_print_uint32(char **output, char *prefix, uint32_t *src, orte_data_type_t type); - int orte_dss_print_int8(char **output, char *prefix, int8_t *src, orte_data_type_t type); - int orte_dss_print_int16(char **output, char *prefix, int16_t *src, orte_data_type_t type); - int orte_dss_print_int32(char **output, char *prefix, int32_t *src, orte_data_type_t type); -#ifdef HAVE_INT64_T - int orte_dss_print_uint64(char **output, char *prefix, uint64_t *src, orte_data_type_t type); - int orte_dss_print_int64(char **output, char *prefix, int64_t *src, orte_data_type_t type); -#else - int orte_dss_print_uint64(char **output, char *prefix, void *src, orte_data_type_t type); - int orte_dss_print_int64(char **output, char *prefix, void *src, orte_data_type_t type); -#endif - int orte_dss_print_null(char **output, char *prefix, void *src, orte_data_type_t type); - int orte_dss_print_std_cntr(char **output, char *prefix, orte_std_cntr_t *src, orte_data_type_t type); - int orte_dss_print_data_type(char **output, char *prefix, orte_data_type_t *src, orte_data_type_t type); -#if OPAL_ENABLE_FT == 1 - int orte_dss_print_ckpt_cmd(char **output, char *prefix, size_t *src, orte_data_type_t type); -#endif - int orte_dss_print_data_value(char **output, char *prefix, orte_data_value_t *src, orte_data_type_t type); - int orte_dss_print_byte_object(char **output, char *prefix, orte_byte_object_t *src, orte_data_type_t type); - - - /* - * Internal release functions - */ - void orte_dss_std_release(orte_data_value_t *value); - - void orte_dss_std_obj_release(orte_data_value_t *value); - - void orte_dss_release_byte_object(orte_data_value_t *value); - - /* - * Internal helper functions - */ - - char* orte_dss_buffer_extend(orte_buffer_t *bptr, size_t bytes_to_add); - - bool orte_dss_too_small(orte_buffer_t *buffer, size_t bytes_reqd); - - orte_dss_type_info_t* orte_dss_find_type(orte_data_type_t type); - - int orte_dss_store_data_type(orte_buffer_t *buffer, orte_data_type_t type); - - int orte_dss_get_data_type(orte_buffer_t *buffer, orte_data_type_t *type); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/dss/dss_open_close.c b/orte/dss/dss_open_close.c deleted file mode 100644 index eb785efa08..0000000000 --- a/orte/dss/dss_open_close.c +++ /dev/null @@ -1,503 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "opal/mca/base/mca_base_param.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" - -#include "orte/dss/dss_internal.h" - -/** - * globals - */ -bool orte_dss_initialized = false; -bool orte_dss_debug = false; -int orte_dss_verbose = -1; /* by default disabled */ -int orte_dss_initial_size; -int orte_dss_threshold_size; -orte_pointer_array_t *orte_dss_types; -orte_data_type_t orte_dss_num_reg_types; -orte_dss_buffer_type_t default_buf_type; - -orte_dss_t orte_dss = { - orte_dss_set, - orte_dss_get, - orte_dss_arith, - orte_dss_increment, - orte_dss_decrement, - orte_dss_set_buffer_type, - orte_dss_pack, - orte_dss_unpack, - orte_dss_copy, - orte_dss_compare, - orte_dss_size, - orte_dss_print, - orte_dss_release, - orte_dss_peek, - orte_dss_unload, - orte_dss_load, - orte_dss_xfer_payload, - orte_dss_copy_payload, - orte_dss_register, - orte_dss_lookup_data_type, - orte_dss_dump_data_types, - orte_dss_dump -}; - -/** - * Object constructors, destructors, and instantiations - */ -/** Data Value **/ -/* constructor - used to initialize state of data value instance */ -static void orte_data_value_construct(orte_data_value_t* ptr) -{ - ptr->type = ORTE_UNDEF; - ptr->data = NULL; -} -/* destructor - used to release data value instance */ -static void orte_data_value_destruct(orte_data_value_t* ptr) -{ - if (NULL != ptr->data) { - orte_dss.release(ptr); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_data_value_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_data_value_construct, /* constructor */ - orte_data_value_destruct); /* destructor */ - - -static void orte_buffer_construct (orte_buffer_t* buffer) -{ - /** set the default buffer type */ - buffer->type = default_buf_type; - - /* Make everything NULL to begin with */ - - buffer->base_ptr = buffer->pack_ptr = buffer->unpack_ptr = NULL; - buffer->bytes_allocated = buffer->bytes_used = 0; -} - -static void orte_buffer_destruct (orte_buffer_t* buffer) -{ - if (NULL != buffer) { - if (NULL != buffer->base_ptr) { - free (buffer->base_ptr); - } - } -} - -OBJ_CLASS_INSTANCE(orte_buffer_t, - opal_object_t, - orte_buffer_construct, - orte_buffer_destruct); - - -static void orte_dss_type_info_construct(orte_dss_type_info_t *obj) -{ - obj->odti_name = NULL; - obj->odti_pack_fn = NULL; - obj->odti_unpack_fn = NULL; - obj->odti_copy_fn = NULL; - obj->odti_compare_fn = NULL; - obj->odti_size_fn = NULL; - obj->odti_print_fn = NULL; - obj->odti_release_fn = NULL; - obj->odti_structured = false; -} - -static void orte_dss_type_info_destruct(orte_dss_type_info_t *obj) -{ - if (NULL != obj->odti_name) { - free(obj->odti_name); - } -} - -OBJ_CLASS_INSTANCE(orte_dss_type_info_t, opal_object_t, - orte_dss_type_info_construct, - orte_dss_type_info_destruct); - - -int orte_dss_open(void) -{ - char *enviro_val; - int id, rc; - orte_data_type_t tmp; - int def_type; - - if (orte_dss_initialized) { - return ORTE_SUCCESS; - } - - enviro_val = getenv("ORTE_dss_debug"); - if (NULL != enviro_val) { /* debug requested */ - orte_dss_debug = true; - } else { - orte_dss_debug = false; - } - - /** set the default buffer type. If we are in debug mode, then we default - * to fully described buffers. Otherwise, we default to non-described for brevity - * and performance - */ -#if OMPI_ENABLE_DEBUG - def_type = ORTE_DSS_BUFFER_FULLY_DESC; -#else - def_type = ORTE_DSS_BUFFER_NON_DESC; -#endif - - id = mca_base_param_register_int("dss", "buffer", "type", - "Set the default mode for OpenRTE buffers (0=non-described, 1=described)", - def_type); - mca_base_param_lookup_int(id, &rc); - default_buf_type = rc; - - /* setup the initial size of the buffer. */ - id = mca_base_param_register_int("dss", "buffer_initial", "size", NULL, - ORTE_DSS_DEFAULT_INITIAL_SIZE); - mca_base_param_lookup_int(id, &orte_dss_initial_size); - - /* the threshold as to where to stop doubling the size of the buffer - * allocated memory and start doing additive increases */ - id = mca_base_param_register_int("dss", "buffer_threshold", "size", NULL, - ORTE_DSS_DEFAULT_THRESHOLD_SIZE); - mca_base_param_lookup_int(id, &orte_dss_threshold_size); - - /* Setup the types array */ - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&orte_dss_types, - ORTE_DSS_ID_DYNAMIC, - ORTE_DSS_ID_MAX, - ORTE_DSS_ID_MAX))) { - ORTE_ERROR_LOG(rc); - return rc; - } - orte_dss_num_reg_types = 0; - - /* Register all the intrinsic types */ - - tmp = ORTE_NULL; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_null, - orte_dss_unpack_null, - (orte_dss_copy_fn_t)orte_dss_copy_null, - (orte_dss_compare_fn_t)orte_dss_compare_null, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_null, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_NULL", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_BYTE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_byte, - orte_dss_unpack_byte, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_byte, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_byte, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_BYTE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_BOOL; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_bool, - orte_dss_unpack_bool, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_bool, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_bool, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_BOOL", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int, - orte_dss_unpack_int, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int, - orte_dss_unpack_int, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT8; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_byte, - orte_dss_unpack_byte, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int8, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int8, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT8", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT8; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_byte, - orte_dss_unpack_byte, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint8, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint8, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT8", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT16; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int16, - orte_dss_unpack_int16, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int16, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int16, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT16", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT16; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int16, - orte_dss_unpack_int16, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint16, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint16, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT16", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT32; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int32, - orte_dss_unpack_int32, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int32, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int32, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT32", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT32; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int32, - orte_dss_unpack_int32, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint32, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint32, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT32", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT64; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int64, - orte_dss_unpack_int64, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int64, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int64, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT64", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT64; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int64, - orte_dss_unpack_int64, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint64, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint64, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT64", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_SIZE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_sizet, - orte_dss_unpack_sizet, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_size, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_size, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_SIZE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_PID; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_pid, - orte_dss_unpack_pid, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_pid, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_pid, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_PID", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_STRING; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_string, - orte_dss_unpack_string, - (orte_dss_copy_fn_t)orte_dss_copy_string, - (orte_dss_compare_fn_t)orte_dss_compare_string, - (orte_dss_size_fn_t)orte_dss_size_string, - (orte_dss_print_fn_t)orte_dss_print_string, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_STRUCTURED, - "ORTE_STRING", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_STD_CNTR; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_std_cntr, - orte_dss_unpack_std_cntr, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_std_cntr, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_std_cntr, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_STD_CNTR", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_DATA_TYPE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_data_type, - orte_dss_unpack_data_type, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_dt, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_data_type, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_DATA_TYPE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_DATA_VALUE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_data_value, - orte_dss_unpack_data_value, - (orte_dss_copy_fn_t)orte_dss_copy_data_value, - (orte_dss_compare_fn_t)orte_dss_compare_data_value, - (orte_dss_size_fn_t)orte_dss_size_data_value, - (orte_dss_print_fn_t)orte_dss_print_data_value, - (orte_dss_release_fn_t)orte_dss_std_obj_release, - ORTE_DSS_STRUCTURED, - "ORTE_DATA_VALUE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - -#if OPAL_ENABLE_FT == 1 - tmp = ORTE_CKPT_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_ckpt_cmd, - orte_dss_unpack_ckpt_cmd, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_ckpt_cmd, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_ckpt_cmd, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_DATA_TYPE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } -#endif - - tmp = ORTE_BYTE_OBJECT; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_byte_object, - orte_dss_unpack_byte_object, - (orte_dss_copy_fn_t)orte_dss_copy_byte_object, - (orte_dss_compare_fn_t)orte_dss_compare_byte_object, - (orte_dss_size_fn_t)orte_dss_size_byte_object, - (orte_dss_print_fn_t)orte_dss_print_byte_object, - (orte_dss_release_fn_t)orte_dss_release_byte_object, - ORTE_DSS_STRUCTURED, - "ORTE_BYTE_OBJECT", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* All done */ - - return ORTE_SUCCESS; -} - - -int orte_dss_close(void) -{ - orte_std_cntr_t i; - - orte_dss_initialized = false; - - for (i = 0 ; i < orte_pointer_array_get_size(orte_dss_types) ; ++i) { - orte_dss_type_info_t *info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, i); - if (NULL != info) { - OBJ_RELEASE(info); - } - } - - OBJ_RELEASE(orte_dss_types); - - return ORTE_SUCCESS; -} diff --git a/orte/dss/dss_pack.c b/orte/dss/dss_pack.c deleted file mode 100644 index bec18b7067..0000000000 --- a/orte/dss/dss_pack.c +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "opal/types.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" - -int orte_dss_pack(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int rc; - - /* check for error */ - if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* Pack the number of values */ - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - if (ORTE_SUCCESS != (rc = orte_dss_store_data_type(buffer, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - if (ORTE_SUCCESS != (rc = orte_dss_pack_std_cntr(buffer, &num_vals, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* Pack the value(s) */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, src, num_vals, type))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -int orte_dss_pack_buffer(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int rc; - orte_dss_type_info_t *info; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_buffer( %p, %p, %lu, %d )\n", - (void*)buffer, src, (long unsigned int)num_vals, (int)type ) ); - - /* Pack the declared data type */ - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - if (ORTE_SUCCESS != (rc = orte_dss_store_data_type(buffer, type))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* Lookup the pack function for this type and call it */ - - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; - } - - if (ORTE_SUCCESS != (rc = info->odti_pack_fn(buffer, src, num_vals, type))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - - -/* PACK FUNCTIONS FOR GENERIC SYSTEM TYPES */ - -/* - * BOOL - */ -int orte_dss_pack_bool(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* System types need to always be described so we can properly - unpack them. If we aren't fully described, then add the - description for this type... */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, DSS_TYPE_BOOL))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_BOOL))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * INT - */ -int orte_dss_pack_int(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* System types need to always be described so we can properly - unpack them. If we aren't fully described, then add the - description for this type... */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, DSS_TYPE_INT))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_INT))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * SIZE_T - */ -int orte_dss_pack_sizet(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* System types need to always be described so we can properly - unpack them. If we aren't fully described, then add the - description for this type... */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, DSS_TYPE_SIZE_T))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_SIZE_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * PID_T - */ -int orte_dss_pack_pid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* System types need to always be described so we can properly - unpack them. If we aren't fully described, then add the - description for this type... */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, DSS_TYPE_PID_T))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_PID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - - -/* PACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ - -/* - * NULL - */ -int orte_dss_pack_null(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - char null=0x00; - char *dst; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_null * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, num_vals))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* store the nulls */ - memset(dst, (int)null, num_vals); - - /* update buffer pointers */ - buffer->pack_ptr += num_vals; - buffer->bytes_used += num_vals; - - return ORTE_SUCCESS; -} - -/* - * BYTE, CHAR, INT8 - */ -int orte_dss_pack_byte(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - char *dst; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_byte * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, num_vals))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* store the data */ - memcpy(dst, src, num_vals); - - /* update buffer pointers */ - buffer->pack_ptr += num_vals; - buffer->bytes_used += num_vals; - - return ORTE_SUCCESS; -} - -/* - * INT16 - */ -int orte_dss_pack_int16(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint16_t tmp, *srctmp = (uint16_t*) src; - char *dst; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_int16 * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, num_vals*sizeof(tmp)))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < num_vals; ++i) { - tmp = htons(srctmp[i]); - memcpy(dst, &tmp, sizeof(tmp)); - dst += sizeof(tmp); - } - buffer->pack_ptr += num_vals * sizeof(tmp); - buffer->bytes_used += num_vals * sizeof(tmp); - - return ORTE_SUCCESS; -} - -/* - * INT32 - */ -int orte_dss_pack_int32(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint32_t tmp, *srctmp = (uint32_t*) src; - char *dst; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_int32 * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, num_vals*sizeof(tmp)))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < num_vals; ++i) { - tmp = htonl(srctmp[i]); - memcpy(dst, &tmp, sizeof(tmp)); - dst += sizeof(tmp); - } - buffer->pack_ptr += num_vals * sizeof(tmp); - buffer->bytes_used += num_vals * sizeof(tmp); - - return ORTE_SUCCESS; -} - -/* - * INT64 - */ -int orte_dss_pack_int64(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint64_t tmp, *srctmp = (uint64_t*) src; - char *dst; - size_t bytes_packed = num_vals * sizeof(tmp); - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_int64 * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, bytes_packed))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < num_vals; ++i) { - tmp = hton64(srctmp[i]); - memcpy(dst, &tmp, sizeof(tmp)); - dst += sizeof(tmp); - } - buffer->pack_ptr += bytes_packed; - buffer->bytes_used += bytes_packed; - - return ORTE_SUCCESS; -} - -/* - * STRING - */ -int orte_dss_pack_string(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret = ORTE_SUCCESS; - orte_std_cntr_t i, len; - char **ssrc = (char**) src; - - for (i = 0; i < num_vals; ++i) { - if (NULL == ssrc[i]) { /* got zero-length string/NULL pointer - store NULL */ - len = 0; - if (ORTE_SUCCESS != (ret = orte_dss_pack_std_cntr(buffer, &len, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - len = (orte_std_cntr_t)strlen(ssrc[i]) + 1; - if (ORTE_SUCCESS != (ret = orte_dss_pack_std_cntr(buffer, &len, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - return ret; - } - if (ORTE_SUCCESS != (ret = - orte_dss_pack_byte(buffer, ssrc[i], len, ORTE_BYTE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - } - - return ORTE_SUCCESS; -} - -/* PACK FUNCTIONS FOR GENERIC ORTE TYPES */ - -/* - * ORTE_STD_CNTR - */ -int orte_dss_pack_std_cntr(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_STD_CNTR_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * ORTE_DATA_TYPE - */ -int orte_dss_pack_data_type(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_DATA_TYPE_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * ORTE_DATA_VALUE - */ -int orte_dss_pack_data_value(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num, orte_data_type_t type) -{ - orte_dss_type_info_t *info; - orte_data_value_t **sdv; - orte_std_cntr_t i; - int ret; - - sdv = (orte_data_value_t **) src; - - for (i = 0; i < num; ++i) { - /* if the src data value is NULL, then we will pack it as ORTE_NULL to indicate - * that the unpack should leave a NULL data value - */ - if (NULL == sdv[i]) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, ORTE_NULL))) { - ORTE_ERROR_LOG(ret); - return ret; - } - continue; - } - - /* pack the data type - we'll need it on the other end */ - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, sdv[i]->type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* if the data type is UNDEF, then nothing more to do */ - if (ORTE_UNDEF == sdv[i]->type) continue; - - /* Lookup the pack function for this type and call it */ - - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, sdv[i]->type))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; - } - - if (info->odti_structured) { - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, &(sdv[i]->data), 1, sdv[i]->type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, sdv[i]->data, 1, sdv[i]->type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - } - - return ORTE_SUCCESS; -} - -#if OPAL_ENABLE_FT == 1 -/* - * ORTE_CKPT_CMD - */ -int orte_dss_pack_ckpt_cmd(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num, - orte_data_type_t type) -{ - size_t required; - int rc; - - required = sizeof(size_t); - switch (required) { - - case 1: - if (ORTE_SUCCESS != ( - rc = orte_dss_pack_byte(buffer, src, num, ORTE_BYTE))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 2: - if (ORTE_SUCCESS != ( - rc = orte_dss_pack_int16(buffer, src, num, ORTE_INT16))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 4: - if (ORTE_SUCCESS != ( - rc = orte_dss_pack_int32(buffer, src, num, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 8: - if (ORTE_SUCCESS != ( - rc = orte_dss_pack_int64(buffer, src, num, ORTE_INT64))) { - ORTE_ERROR_LOG(rc); - } - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - return rc; -} -#endif - -/* - * ORTE_BYTE_OBJECT - */ -int orte_dss_pack_byte_object(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num, - orte_data_type_t type) -{ - orte_byte_object_t **sbyteptr; - orte_std_cntr_t i, n; - int ret; - - sbyteptr = (orte_byte_object_t **) src; - - for (i = 0; i < num; ++i) { - n = sbyteptr[i]->size; - if (ORTE_SUCCESS != (ret = orte_dss_pack_std_cntr(buffer, &n, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - return ret; - } - if (0 < n) { - if (ORTE_SUCCESS != (ret = - orte_dss_pack_byte(buffer, sbyteptr[i]->bytes, n, ORTE_BYTE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/dss/dss_print.c b/orte/dss/dss_print.c deleted file mode 100644 index 5ce88a6dc3..0000000000 --- a/orte/dss/dss_print.c +++ /dev/null @@ -1,522 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" - -int orte_dss_print(char **output, char *prefix, void *src, orte_data_type_t type) -{ - int rc; - orte_dss_type_info_t *info; - - /* check for error */ - if (NULL == output) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* Lookup the print function for this type and call it */ - - if (!(type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - if (ORTE_SUCCESS != (rc = info->odti_print_fn(output, prefix, src, type))) { - ORTE_ERROR_LOG(rc); - } - - - return rc; -} - -/* - * STANDARD PRINT FUNCTIONS FOR SYSTEM TYPES - */ -int orte_dss_print_byte(char **output, char *prefix, uint8_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_BYTE\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_BYTE\tValue: %x", prefix, *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_string(char **output, char *prefix, char *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_STRING\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_STRING\tValue: %s", prefx, src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_size(char **output, char *prefix, size_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_SIZE\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_SIZE\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_pid(char **output, char *prefix, pid_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_PID\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_PID\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_bool(char **output, char *prefix, bool *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_BOOL\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_BOOL\tValue: %s", prefx, *src ? "TRUE" : "FALSE"); - - return ORTE_SUCCESS; -} - -int orte_dss_print_int(char **output, char *prefix, int *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_INT\tValue: %ld", prefx, (long) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_uint(char **output, char *prefix, int *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_UINT\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_uint8(char **output, char *prefix, uint8_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT8\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_UINT8\tValue: %u", prefx, (unsigned int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_uint16(char **output, char *prefix, uint16_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT16\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_UINT16\tValue: %u", prefx, (unsigned int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_uint32(char **output, char *prefix, uint32_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT32\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_UINT32\tValue: %u", prefx, (unsigned int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_int8(char **output, char *prefix, int8_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT8\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_INT8\tValue: %d", prefx, (int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_int16(char **output, char *prefix, int16_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT16\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_INT16\tValue: %d", prefx, (int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_int32(char **output, char *prefix, int32_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT32\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_INT32\tValue: %d", prefx, (int) *src); - - return ORTE_SUCCESS; -} -int orte_dss_print_uint64(char **output, char *prefix, -#ifdef HAVE_INT64_T - uint64_t *src, -#else - void *src, -#endif /* HAVE_INT64_T */ - orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT64\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - -#ifdef HAVE_INT64_T - asprintf(output, "%sData type: ORTE_UINT64\tValue: %lu", prefx, (unsigned long) *src); -#else - asprintf(output, "%sData type: ORTE_UINT64\tValue: unsupported", prefx); -#endif /* HAVE_INT64_T */ - - return ORTE_SUCCESS; -} - -int orte_dss_print_int64(char **output, char *prefix, -#ifdef HAVE_INT64_T - int64_t *src, -#else - void *src, -#endif /* HAVE_INT64_T */ - orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT64\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - -#ifdef HAVE_INT64_T - asprintf(output, "%sData type: ORTE_INT64\tValue: %ld", prefx, (long) *src); -#else - asprintf(output, "%sData type: ORTE_INT64\tValue: unsupported", prefx); -#endif /* HAVE_INT64_T */ - - return ORTE_SUCCESS; -} - -int orte_dss_print_null(char **output, char *prefix, void *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_NULL\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_NULL", prefx); - - return ORTE_SUCCESS; -} - - -/* PRINT FUNCTIONS FOR GENERIC ORTE TYPES */ -/* - * ORTE_STD_CNTR - */ -int orte_dss_print_std_cntr(char **output, char *prefix, orte_std_cntr_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_STD_CNTR\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_STD_CNTR\tValue: %lu", prefx, (unsigned long) *src); - return ORTE_SUCCESS; -} - -/* - * ORTE_DATA_TYPE - */ -int orte_dss_print_data_type(char **output, char *prefix, orte_data_type_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_DATA_TYPE\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_DATA_TYPE\tValue: %lu", prefx, (unsigned long) *src); - return ORTE_SUCCESS; -} - -/* - * ORTE_DATA_VALUE - */ -int orte_dss_print_data_value(char **output, char *prefix, orte_data_value_t *src, orte_data_type_t type) -{ - char *pfx, *tmp1, *tmp2; - int rc; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - if (NULL != prefix) { - asprintf(output, "%sData type: ORTE_DATA_VALUE\tValue: NULL pointer", prefix); - } else { - asprintf(output, "Data type: ORTE_DATA_VALUE\tValue: NULL pointer"); - } - return ORTE_SUCCESS; - } - - if (NULL != prefix) { - asprintf(&pfx, "%s\t", prefix); - asprintf(&tmp1, "%sData type: ORTE_DATA_VALUE:\n", prefix); - } else { - asprintf(&tmp1, "Data type: ORTE_DATA_VALUE:\n"); - asprintf(&pfx, "\t"); - } - - /* if data is included, print it */ - if (ORTE_UNDEF == src->type) { /* undefined data type - just report it */ - asprintf(&tmp2, "%sData type: ORTE_UNDEF\tValue: N/A", pfx); - } else if (NULL != src->data) { - if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->data, src->type))) { - ORTE_ERROR_LOG(rc); - if (NULL != tmp1) free(tmp1); - if (NULL != pfx) free(pfx); - *output = NULL; - return rc; - } - } else { /* indicate the data field was NULL */ - asprintf(&tmp2, "%sData field is NULL", pfx); - } - - asprintf(output, "%s%s", tmp1, tmp2); - free(tmp1); - free(tmp2); - if (NULL != pfx) free(pfx); - - return ORTE_SUCCESS; -} - -#if OPAL_ENABLE_FT == 1 -/* - * ORTE_CKPT_CMD - */ -int orte_dss_print_ckpt_cmd(char **output, char *prefix, size_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_CKPT_CMD\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_CKPT_CMD\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} -#endif - -/* - * ORTE_BYTE_OBJECT - */ -int orte_dss_print_byte_object(char **output, char *prefix, orte_byte_object_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_BYTE_OBJECT\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_BYTE_OBJECT\tSize: %lu", prefx, (unsigned long) src->size); - - return ORTE_SUCCESS; -} diff --git a/orte/dss/dss_register.c b/orte/dss/dss_register.c deleted file mode 100644 index 2d7ced194d..0000000000 --- a/orte/dss/dss_register.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" - -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - - -int orte_dss_register(orte_dss_pack_fn_t pack_fn, - orte_dss_unpack_fn_t unpack_fn, - orte_dss_copy_fn_t copy_fn, - orte_dss_compare_fn_t compare_fn, - orte_dss_size_fn_t size_fn, - orte_dss_print_fn_t print_fn, - orte_dss_release_fn_t release_fn, - bool structured, - const char *name, orte_data_type_t *type) -{ - int ret; - orte_dss_type_info_t *info, **ptr; - orte_std_cntr_t i; - orte_data_type_t j; - - /* Check for bozo cases */ - - if (NULL == pack_fn || NULL == unpack_fn || NULL == copy_fn || NULL == compare_fn || - NULL == size_fn || NULL == print_fn || NULL == name || NULL == type) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* check if this entry already exists - if so, error - we do NOT allow multiple type registrations */ - ptr = (orte_dss_type_info_t**)(orte_dss_types->addr); - for (i=0, j=0; j < orte_dss_num_reg_types && - i < orte_dss_types->size; i++) { - if (NULL != ptr[i]) { - j++; - /* check if the name exists */ - if (0 == strcmp(ptr[i]->odti_name, name)) { - ORTE_ERROR_LOG(ORTE_ERR_DATA_TYPE_REDEF); - return ORTE_ERR_DATA_TYPE_REDEF; - } - /* check if the specified type exists */ - if (*type > 0 && ptr[i]->odti_type == *type) { - ORTE_ERROR_LOG(ORTE_ERR_DATA_TYPE_REDEF); - return ORTE_ERR_DATA_TYPE_REDEF; - } - } - } - - /* if type is given (i.e., *type > 0), then just use it. - * otherwise, go and get a new type id from the name - * service - */ - if (0 >= *type) { - if (ORTE_SUCCESS != (ret = orte_ns.define_data_type(name, type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Add a new entry to the table */ - info = (orte_dss_type_info_t*) OBJ_NEW(orte_dss_type_info_t); - if (NULL == info) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - info->odti_type = *type; - info->odti_name = strdup(name); - info->odti_pack_fn = pack_fn; - info->odti_unpack_fn = unpack_fn; - info->odti_copy_fn = copy_fn; - info->odti_compare_fn = compare_fn; - info->odti_size_fn = size_fn; - info->odti_print_fn = print_fn; - info->odti_release_fn = release_fn; - info->odti_structured = structured; - if (ORTE_SUCCESS != (ret = orte_pointer_array_set_item(orte_dss_types, *type, info))) { - ORTE_ERROR_LOG(ret); - } - - /* All done */ - - return ret; -} diff --git a/orte/dss/dss_types.h b/orte/dss/dss_types.h deleted file mode 100644 index 85404befae..0000000000 --- a/orte/dss/dss_types.h +++ /dev/null @@ -1,109 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Buffer management types. - */ - -#ifndef ORTE_DSS_TYPES_H_ -#define ORTE_DSS_TYPES_H_ - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "opal/class/opal_object.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* define arithmetic operations for readability */ -typedef uint8_t orte_dss_arith_op_t; - -#define ORTE_DSS_ADD 1 -#define ORTE_DSS_SUB 2 -#define ORTE_DSS_MUL 3 -#define ORTE_DSS_DIV 4 - - -/* Data value object */ -typedef struct { - opal_object_t super; /* required for this to be an object */ - orte_data_type_t type; /* the type of value stored */ - void *data; -} orte_data_value_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_data_value_t); - -#define ORTE_DATA_VALUE_EMPTY { OPAL_OBJ_STATIC_INIT(orte_data_value_t), ORTE_UNDEF, NULL} - -/* structured-unstructured data flags */ -#define ORTE_DSS_STRUCTURED true -#define ORTE_DSS_UNSTRUCTURED false - -/** - * buffer type - */ -typedef uint8_t orte_dss_buffer_type_t; -#define ORTE_DSS_BUFFER_NON_DESC 0x00 -#define ORTE_DSS_BUFFER_FULLY_DESC 0x01 - -#define ORTE_DSS_BUFFER_TYPE_HTON(h); -#define ORTE_DSS_BUFFER_TYPE_NTOH(h); - -/** - * Structure for holding a buffer to be used with the RML or OOB - * subsystems. - */ - struct orte_buffer_t { - /** First member must be the object's parent */ - opal_object_t parent; - /** type of buffer */ - orte_dss_buffer_type_t type; - /** Start of my memory */ - char *base_ptr; - /** Where the next data will be packed to (within the allocated - memory starting at base_ptr) */ - char *pack_ptr; - /** Where the next data will be unpacked from (within the - allocated memory starting as base_ptr) */ - char *unpack_ptr; - - /** Number of bytes allocated (starting at base_ptr), - typically in multiples of orte_dps_pages, but may not be - if the buffer was initialized with orte_dps_load(). */ - size_t bytes_allocated; - /** Number of bytes used by the buffer (i.e., amount of data -- - including overhead -- packed in the buffer) */ - size_t bytes_used; - }; - /** - * Convenience typedef - */ - typedef struct orte_buffer_t orte_buffer_t; - - /** formalize the declaration */ - ORTE_DECLSPEC OBJ_CLASS_DECLARATION (orte_buffer_t); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* ORTE_DSS_TYPES_H */ diff --git a/orte/dss/dss_unpack.c b/orte/dss/dss_unpack.c deleted file mode 100644 index 04d7f38d00..0000000000 --- a/orte/dss/dss_unpack.c +++ /dev/null @@ -1,617 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include -#ifdef HAVE_NETINET_IN_H -#include -#endif - -#include "opal/types.h" -#include "opal/util/output.h" -#include "opal/mca/backtrace/backtrace.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" - -int orte_dss_unpack(orte_buffer_t *buffer, void *dst, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int rc, ret; - orte_std_cntr_t local_num, n=1; - orte_data_type_t local_type; - - /* check for error */ - if (NULL == buffer || NULL == dst || NULL == num_vals) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* if user provides a zero for num_vals, then there is no storage allocated - * so return an appropriate error - */ - if (0 == *num_vals) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_INADEQUATE_SPACE); - return ORTE_ERR_UNPACK_INADEQUATE_SPACE; - } - - /** Unpack the declared number of values - * REMINDER: it is possible that the buffer is corrupted and that - * the DSS will *think* there is a proper orte_std_cntr_t variable at the - * beginning of the unpack region - but that the value is bogus (e.g., just - * a byte field in a string array that so happens to have a value that - * matches the orte_std_cntr_t data type flag). Therefore, this error check is - * NOT completely safe. This is true for ALL unpack functions, not just - * orte_std_cntr_t as used here. - */ - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - if (ORTE_SUCCESS != ( - rc = orte_dss_get_data_type(buffer, &local_type))) { - *num_vals = 0; - return rc; - } - if (ORTE_STD_CNTR != local_type) { /* if the length wasn't first, then error */ - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE); - *num_vals = 0; - return ORTE_ERR_UNPACK_FAILURE; - } - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss_unpack_std_cntr(buffer, &local_num, &n, ORTE_STD_CNTR))) { - *num_vals = 0; - return rc; - } - - /** if the storage provided is inadequate, set things up - * to unpack as much as we can and to return an error code - * indicating that everything was not unpacked - the buffer - * is left in a state where it can not be further unpacked. - */ - if (local_num > *num_vals) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_INADEQUATE_SPACE); - local_num = *num_vals; - ret = ORTE_ERR_UNPACK_INADEQUATE_SPACE; - } else { /** enough or more than enough storage */ - *num_vals = local_num; /** let the user know how many we actually unpacked */ - ret = ORTE_SUCCESS; - } - - /** Unpack the value(s) */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dst, &local_num, type))) { - *num_vals = 0; - ret = rc; - } - - return ret; -} - -int orte_dss_unpack_buffer(orte_buffer_t *buffer, void *dst, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int rc; - orte_data_type_t local_type; - orte_dss_type_info_t *info; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_buffer( %p, %p, %lu, %d )\n", - (void*)buffer, dst, (long unsigned int)*num_vals, (int)type ) ); - - /** Unpack the declared data type */ - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - if (ORTE_SUCCESS != (rc = orte_dss_get_data_type(buffer, &local_type))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* if the data types don't match, then return an error */ - if (type != local_type) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_MISMATCH); - return ORTE_ERR_PACK_MISMATCH; - } - } - - /* Lookup the unpack function for this type and call it */ - - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type))) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE); - return ORTE_ERR_UNPACK_FAILURE; - } - - rc = info->odti_unpack_fn(buffer, dst, num_vals, type); - return rc; -} - - -/* UNPACK GENERIC SYSTEM TYPES */ - -/* - * BOOL - */ -int orte_dss_unpack_bool(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_data_type_t remote_type; - - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - /* see what type was actually packed */ - if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - if (remote_type == DSS_TYPE_BOOL) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_BOOL))) { - ORTE_ERROR_LOG(ret); - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(bool, remote_type, ret); - } - return ret; -} - -/* - * INT - */ -int orte_dss_unpack_int(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_data_type_t remote_type; - - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - /* see what type was actually packed */ - if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - if (remote_type == DSS_TYPE_INT) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_INT))) { - ORTE_ERROR_LOG(ret); - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(int, remote_type, ret); - } - - return ret; -} - -/* - * SIZE_T - */ -int orte_dss_unpack_sizet(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_data_type_t remote_type; - - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - /* see what type was actually packed */ - if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - if (remote_type == DSS_TYPE_SIZE_T) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_SIZE_T))) { - ORTE_ERROR_LOG(ret); - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(size_t, remote_type, ret); - } - - return ret; -} - -/* - * PID_T - */ -int orte_dss_unpack_pid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_data_type_t remote_type; - - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - /* see what type was actually packed */ - if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - if (remote_type == DSS_TYPE_PID_T) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_PID_T))) { - ORTE_ERROR_LOG(ret); - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(pid_t, remote_type, ret); - } - - return ret; -} - - -/* UNPACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ - -/* - * NULL - */ -int orte_dss_unpack_null(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_null * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, *num_vals)) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - memcpy(dest, buffer->unpack_ptr, *num_vals); - - /* update buffer pointer */ - buffer->unpack_ptr += *num_vals; - - return ORTE_SUCCESS; -} - -/* - * BYTE, CHAR, INT8 - */ -int orte_dss_unpack_byte(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_byte * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, *num_vals)) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - memcpy(dest, buffer->unpack_ptr, *num_vals); - - /* update buffer pointer */ - buffer->unpack_ptr += *num_vals; - - return ORTE_SUCCESS; -} - -int orte_dss_unpack_int16(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint16_t tmp, *desttmp = (uint16_t*) dest; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_int16 * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); - desttmp[i] = ntohs(tmp); - buffer->unpack_ptr += sizeof(tmp); - } - - return ORTE_SUCCESS; -} - -int orte_dss_unpack_int32(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint32_t tmp, *desttmp = (uint32_t*) dest; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_int32 * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); - desttmp[i] = ntohl(tmp); - buffer->unpack_ptr += sizeof(tmp); - } - - return ORTE_SUCCESS; -} - -int orte_dss_unpack_int64(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint64_t tmp, *desttmp = (uint64_t*) dest; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_int64 * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); - desttmp[i] = ntoh64(tmp); - buffer->unpack_ptr += sizeof(tmp); - } - - return ORTE_SUCCESS; -} - -int orte_dss_unpack_string(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_std_cntr_t i, len, n=1; - char **sdest = (char**) dest; - - for (i = 0; i < (*num_vals); ++i) { - if (ORTE_SUCCESS != (ret = orte_dss_unpack_std_cntr(buffer, &len, &n, ORTE_STD_CNTR))) { - return ret; - } - if (0 == len) { /* zero-length string - unpack the NULL */ - sdest[i] = NULL; - } else { - sdest[i] = (char*)malloc(len); - if (NULL == sdest[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (ret = orte_dss_unpack_byte(buffer, sdest[i], &len, ORTE_BYTE))) { - return ret; - } - } - } - - return ORTE_SUCCESS; -} - - -/* UNPACK FUNCTIONS FOR GENERIC ORTE TYPES */ - -/* - * ORTE_STD_CNTR - */ -int orte_dss_unpack_std_cntr(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int ret; - - /* turn around and unpack the real type */ - ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_STD_CNTR_T); - - return ret; -} - -/* - * ORTE_DATA_TYPE - */ -int orte_dss_unpack_data_type(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int ret; - - /* turn around and unpack the real type */ - ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_DATA_TYPE_T); - - return ret; -} - -#if OPAL_ENABLE_FT == 1 -/* - * ORTE_CKPT_CMD - */ -int orte_dss_unpack_ckpt_cmd(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num, - orte_data_type_t type) -{ - size_t required; - int rc; - - required = sizeof(size_t); - switch (required) { - - case 1: - if (ORTE_SUCCESS != ( - rc = orte_dss_unpack_byte(buffer, dest, num, ORTE_BYTE))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 2: - if (ORTE_SUCCESS != ( - rc = orte_dss_unpack_int16(buffer, dest, num, ORTE_INT16))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 4: - if (ORTE_SUCCESS != ( - rc = orte_dss_unpack_int32(buffer, dest, num, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 8: - if (ORTE_SUCCESS != ( - rc = orte_dss_unpack_int64(buffer, dest, num, ORTE_INT64))) { - ORTE_ERROR_LOG(rc); - } - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - return rc; -} -#endif - -/* - * ORTE_DATA_VALUE - */ -int orte_dss_unpack_data_value(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num, - orte_data_type_t type) -{ - orte_dss_type_info_t *info; - orte_data_value_t **ddv; - orte_std_cntr_t i, n; - orte_data_type_t dt; - size_t nsize; - int ret; - - ddv = (orte_data_value_t **) dest; - - for (i = 0; i < *num; ++i) { - /* see what the data type is */ - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &dt))) { - return ret; - } - - /* if it is ORTE_NULL, then do nothing */ - if (ORTE_NULL == dt) continue; - - /* otherwise, allocate the new object and set the type */ - - ddv[i] = OBJ_NEW(orte_data_value_t); - if (NULL == ddv[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - ddv[i]->type = dt; - - /* if it is UNDEF, then nothing more to do */ - if (ORTE_UNDEF == ddv[i]->type) continue; - - /* get enough memory to hold it */ - if (ORTE_SUCCESS != (ret = orte_dss.size(&nsize, NULL, ddv[i]->type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - ddv[i]->data = (void*)malloc(nsize); - if (NULL == ddv[i]->data) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* Lookup the unpack function for this type and call it */ - - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, ddv[i]->type))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; - } - - if (info->odti_structured) { - n=1; - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, &(ddv[i]->data), &n, ddv[i]->type))) { - return ret; - } - } else { - n=1; - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, ddv[i]->data, &n, ddv[i]->type))) { - return ret; - } - } - } - - return ORTE_SUCCESS; -} - - -/* - * ORTE_BYTE_OBJECT - */ -int orte_dss_unpack_byte_object(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num, - orte_data_type_t type) -{ - int ret; - orte_std_cntr_t i, n, m=1; - orte_byte_object_t **dbyteptr; - - dbyteptr = (orte_byte_object_t**)dest; - n = *num; - for(i=0; isize), &m, ORTE_STD_CNTR))) { - return ret; - } - if (0 < dbyteptr[i]->size) { - dbyteptr[i]->bytes = (uint8_t*)malloc(dbyteptr[i]->size); - if (NULL == dbyteptr[i]->bytes) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (ret = orte_dss_unpack_byte(buffer, (dbyteptr[i]->bytes), - &(dbyteptr[i]->size), ORTE_BYTE))) { - return ret; - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/include/orte/Makefile.am b/orte/include/orte/Makefile.am index dd7599d6bc..41db25cb15 100644 --- a/orte/include/orte/Makefile.am +++ b/orte/include/orte/Makefile.am @@ -20,8 +20,8 @@ # orte/include//Makefile.am headers += \ - orte/orte_constants.h \ - orte/orte_types.h + orte/constants.h \ + orte/types.h nodist_headers += \ orte/version.h diff --git a/orte/include/orte/orte_constants.h b/orte/include/orte/constants.h similarity index 59% rename from orte/include/orte/orte_constants.h rename to orte/include/orte/constants.h index 0059e451d1..eb072fced3 100644 --- a/orte/include/orte/orte_constants.h +++ b/orte/include/orte/constants.h @@ -22,16 +22,10 @@ #include "opal/constants.h" #include "orte_config.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS #define ORTE_ERR_BASE OPAL_ERR_MAX -/* define the results values for comparisons so we can change them in only one place */ -#define ORTE_VALUE1_GREATER +1 -#define ORTE_VALUE2_GREATER -1 -#define ORTE_EQUAL 0 enum { /* Error codes inherited from OPAL. Still enum values so that we @@ -60,7 +54,17 @@ enum { ORTE_ERR_FILE_READ_FAILURE = OPAL_ERR_FILE_READ_FAILURE, ORTE_ERR_FILE_WRITE_FAILURE = OPAL_ERR_FILE_WRITE_FAILURE, ORTE_ERR_FILE_OPEN_FAILURE = OPAL_ERR_FILE_OPEN_FAILURE, - + ORTE_ERR_PACK_MISMATCH = OPAL_ERR_PACK_MISMATCH, + ORTE_ERR_PACK_FAILURE = OPAL_ERR_PACK_FAILURE, + ORTE_ERR_UNPACK_FAILURE = OPAL_ERR_UNPACK_FAILURE, + ORTE_ERR_UNPACK_INADEQUATE_SPACE = OPAL_ERR_UNPACK_INADEQUATE_SPACE, + ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER = OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER, + ORTE_ERR_TYPE_MISMATCH = OPAL_ERR_TYPE_MISMATCH, + ORTE_ERR_OPERATION_UNSUPPORTED = OPAL_ERR_OPERATION_UNSUPPORTED, + ORTE_ERR_UNKNOWN_DATA_TYPE = OPAL_ERR_UNKNOWN_DATA_TYPE, + ORTE_ERR_BUFFER = OPAL_ERR_BUFFER, + ORTE_ERR_DATA_TYPE_REDEF = OPAL_ERR_DATA_TYPE_REDEF, + ORTE_ERR_DATA_OVERWRITE_ATTEMPT = OPAL_ERR_DATA_OVERWRITE_ATTEMPT, /* error codes specific to ORTE - don't forget to update orte/util/error_strings.c when adding new error codes!! Otherwise, the error reporting system will potentially crash, @@ -69,43 +73,32 @@ enum { ORTE_ERR_RECV_LESS_THAN_POSTED = (ORTE_ERR_BASE - 1), ORTE_ERR_RECV_MORE_THAN_POSTED = (ORTE_ERR_BASE - 2), ORTE_ERR_NO_MATCH_YET = (ORTE_ERR_BASE - 3), - ORTE_ERR_BUFFER = (ORTE_ERR_BASE - 4), - ORTE_ERR_REQUEST = (ORTE_ERR_BASE - 5), - ORTE_ERR_NO_CONNECTION_ALLOWED = (ORTE_ERR_BASE - 6), - ORTE_ERR_CONNECTION_REFUSED = (ORTE_ERR_BASE - 7), - ORTE_ERR_CONNECTION_FAILED = (ORTE_ERR_BASE - 8), - ORTE_ERR_PACK_MISMATCH = (ORTE_ERR_BASE - 9), - ORTE_ERR_PACK_FAILURE = (ORTE_ERR_BASE - 10), - ORTE_ERR_UNPACK_FAILURE = (ORTE_ERR_BASE - 11), - ORTE_ERR_COMM_FAILURE = (ORTE_ERR_BASE - 12), - ORTE_ERR_UNPACK_INADEQUATE_SPACE = (ORTE_ERR_BASE - 13), - ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER = (ORTE_ERR_BASE - 14), - ORTE_ERR_GPR_DATA_CORRUPT = (ORTE_ERR_BASE - 15), - ORTE_ERR_TYPE_MISMATCH = (ORTE_ERR_BASE - 16), - ORTE_ERR_COMPARE_FAILURE = (ORTE_ERR_BASE - 17), - ORTE_ERR_COPY_FAILURE = (ORTE_ERR_BASE - 18), - ORTE_ERR_UNKNOWN_DATA_TYPE = (ORTE_ERR_BASE - 19), - ORTE_ERR_DATA_TYPE_REDEF = (ORTE_ERR_BASE - 20), - ORTE_ERR_DATA_OVERWRITE_ATTEMPT = (ORTE_ERR_BASE - 21), - ORTE_ERR_OPERATION_UNSUPPORTED = (ORTE_ERR_BASE - 22), - ORTE_ERR_PROC_STATE_MISSING = (ORTE_ERR_BASE - 23), - ORTE_ERR_PROC_EXIT_STATUS_MISSING = (ORTE_ERR_BASE - 24), - ORTE_ERR_INDETERMINATE_STATE_INFO = (ORTE_ERR_BASE - 25), - ORTE_ERR_NODE_FULLY_USED = (ORTE_ERR_BASE - 26), - ORTE_ERR_INVALID_NUM_PROCS = (ORTE_ERR_BASE - 27), - ORTE_ERR_SILENT = (ORTE_ERR_BASE - 28), - ORTE_ERR_ADDRESSEE_UNKNOWN = (ORTE_ERR_BASE - 29), - ORTE_ERR_SYS_LIMITS_PIPES = (ORTE_ERR_BASE - 30), - ORTE_ERR_PIPE_SETUP_FAILURE = (ORTE_ERR_BASE - 31), - ORTE_ERR_SYS_LIMITS_CHILDREN = (ORTE_ERR_BASE - 32), - ORTE_ERR_FAILED_GET_TERM_ATTRS = (ORTE_ERR_BASE - 33), - ORTE_ERR_WDIR_NOT_FOUND = (ORTE_ERR_BASE - 34), - ORTE_ERR_EXE_NOT_FOUND = (ORTE_ERR_BASE - 35), - ORTE_ERR_PIPE_READ_FAILURE = (ORTE_ERR_BASE - 36), - ORTE_ERR_EXE_NOT_ACCESSIBLE = (ORTE_ERR_BASE - 37), - ORTE_ERR_FAILED_TO_START = (ORTE_ERR_BASE - 38), - ORTE_ERR_FILE_NOT_EXECUTABLE = (ORTE_ERR_BASE - 39), - ORTE_ERR_HNP_COULD_NOT_START = (ORTE_ERR_BASE - 40) + ORTE_ERR_REQUEST = (ORTE_ERR_BASE - 4), + ORTE_ERR_NO_CONNECTION_ALLOWED = (ORTE_ERR_BASE - 5), + ORTE_ERR_CONNECTION_REFUSED = (ORTE_ERR_BASE - 6), + ORTE_ERR_CONNECTION_FAILED = (ORTE_ERR_BASE - 7), + ORTE_ERR_COMM_FAILURE = (ORTE_ERR_BASE - 8), + ORTE_ERR_GPR_DATA_CORRUPT = (ORTE_ERR_BASE - 9), + ORTE_ERR_COMPARE_FAILURE = (ORTE_ERR_BASE - 10), + ORTE_ERR_COPY_FAILURE = (ORTE_ERR_BASE - 11), + ORTE_ERR_PROC_STATE_MISSING = (ORTE_ERR_BASE - 12), + ORTE_ERR_PROC_EXIT_STATUS_MISSING = (ORTE_ERR_BASE - 13), + ORTE_ERR_INDETERMINATE_STATE_INFO = (ORTE_ERR_BASE - 14), + ORTE_ERR_NODE_FULLY_USED = (ORTE_ERR_BASE - 15), + ORTE_ERR_INVALID_NUM_PROCS = (ORTE_ERR_BASE - 16), + ORTE_ERR_SILENT = (ORTE_ERR_BASE - 17), + ORTE_ERR_ADDRESSEE_UNKNOWN = (ORTE_ERR_BASE - 18), + ORTE_ERR_SYS_LIMITS_PIPES = (ORTE_ERR_BASE - 19), + ORTE_ERR_PIPE_SETUP_FAILURE = (ORTE_ERR_BASE - 20), + ORTE_ERR_SYS_LIMITS_CHILDREN = (ORTE_ERR_BASE - 21), + ORTE_ERR_FAILED_GET_TERM_ATTRS = (ORTE_ERR_BASE - 22), + ORTE_ERR_WDIR_NOT_FOUND = (ORTE_ERR_BASE - 23), + ORTE_ERR_EXE_NOT_FOUND = (ORTE_ERR_BASE - 24), + ORTE_ERR_PIPE_READ_FAILURE = (ORTE_ERR_BASE - 25), + ORTE_ERR_EXE_NOT_ACCESSIBLE = (ORTE_ERR_BASE - 26), + ORTE_ERR_FAILED_TO_START = (ORTE_ERR_BASE - 27), + ORTE_ERR_FILE_NOT_EXECUTABLE = (ORTE_ERR_BASE - 28), + ORTE_ERR_HNP_COULD_NOT_START = (ORTE_ERR_BASE - 29) }; #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) @@ -113,9 +106,7 @@ enum { /* include the prototype for the error-to-string converter */ ORTE_DECLSPEC const char* orte_err2str(int errnum); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif /* ORTE_CONSTANTS_H */ diff --git a/orte/include/orte/orte_types.h b/orte/include/orte/orte_types.h deleted file mode 100644 index 2fd97a906f..0000000000 --- a/orte/include/orte/orte_types.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#ifndef ORTE_TYPES_H -#define ORTE_TYPES_H - -#include "orte_config.h" - -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -/** - * Supported datatypes for messaging and storage operations. - */ - -typedef uint8_t orte_data_type_t; /** data type indicators used in ORTE */ -#define ORTE_DATA_TYPE_T ORTE_UINT8 -#define ORTE_DSS_ID_MAX UINT8_MAX -#define ORTE_DSS_ID_INVALID ORTE_DSS_ID_MAX - -typedef int32_t orte_std_cntr_t; /** standard counters used in ORTE */ -#define ORTE_STD_CNTR_T ORTE_INT32 -#define ORTE_STD_CNTR_MAX INT32_MAX -#define ORTE_STD_CNTR_MIN INT32_MIN -#define ORTE_STD_CNTR_INVALID -1 - -/* define a structure to hold generic byte objects */ -typedef struct { - orte_std_cntr_t size; - uint8_t *bytes; -} orte_byte_object_t; - -/** - * handle differences in iovec - */ - -#if defined(__APPLE__) || defined(__WINDOWS__) -typedef char* orte_iov_base_ptr_t; -#else -typedef void* orte_iov_base_ptr_t; -#endif - - -#define ORTE_UNDEF (orte_data_type_t) 0 /**< type hasn't been defined yet */ -#define ORTE_BYTE (orte_data_type_t) 1 /**< a byte of data */ -#define ORTE_BOOL (orte_data_type_t) 2 /**< boolean */ -#define ORTE_STRING (orte_data_type_t) 3 /**< a NULL terminated string */ -#define ORTE_SIZE (orte_data_type_t) 4 /**< the generic size_t */ -#define ORTE_PID (orte_data_type_t) 5 /**< process pid */ - /* all the integer flavors */ -#define ORTE_INT (orte_data_type_t) 6 /**< generic integer */ -#define ORTE_INT8 (orte_data_type_t) 7 /**< an 8-bit integer */ -#define ORTE_INT16 (orte_data_type_t) 8 /**< a 16-bit integer */ -#define ORTE_INT32 (orte_data_type_t) 9 /**< a 32-bit integer */ -#define ORTE_INT64 (orte_data_type_t) 10 /**< a 64-bit integer */ - /* all the unsigned integer flavors */ -#define ORTE_UINT (orte_data_type_t) 11 /**< generic unsigned integer */ -#define ORTE_UINT8 (orte_data_type_t) 12 /**< an 8-bit unsigned integer */ -#define ORTE_UINT16 (orte_data_type_t) 13 /**< a 16-bit unsigned integer */ -#define ORTE_UINT32 (orte_data_type_t) 14 /**< a 32-bit unsigned integer */ -#define ORTE_UINT64 (orte_data_type_t) 15 /**< a 64-bit unsigned integer */ - - /* we don't support floating point types */ - - /* orte-specific typedefs - grouped according to the subystem that handles - * their packing/unpacking */ - /* General types - packing/unpacking handled within DSS */ -#define ORTE_BYTE_OBJECT (orte_data_type_t) 16 /**< byte object structure */ -#define ORTE_DATA_TYPE (orte_data_type_t) 17 /**< data type */ -#define ORTE_NULL (orte_data_type_t) 18 /**< don't interpret data type */ -#define ORTE_DATA_VALUE (orte_data_type_t) 19 /**< data value */ -#define ORTE_ARITH_OP (orte_data_type_t) 20 /**< arithmetic operation flag */ -#define ORTE_STD_CNTR (orte_data_type_t) 21 /**< standard counter type */ - /* Name Service types */ -#define ORTE_NAME (orte_data_type_t) 22 /**< an orte_process_name_t */ -#define ORTE_VPID (orte_data_type_t) 23 /**< a vpid */ -#define ORTE_JOBID (orte_data_type_t) 24 /**< a jobid */ -#define ORTE_NODEID (orte_data_type_t) 25 /**< a node id */ - /* SMR types */ -#define ORTE_NODE_STATE (orte_data_type_t) 26 /**< node status flag */ -#define ORTE_PROC_STATE (orte_data_type_t) 27 /**< process/resource status */ -#define ORTE_JOB_STATE (orte_data_type_t) 28 /**< job status flag */ -#define ORTE_EXIT_CODE (orte_data_type_t) 29 /**< process exit code */ - /* GPR types */ -#define ORTE_GPR_KEYVAL (orte_data_type_t) 30 /**< registry key-value pair */ -#define ORTE_GPR_NOTIFY_ACTION (orte_data_type_t) 31 /**< registry notify action */ -#define ORTE_GPR_TRIGGER_ACTION (orte_data_type_t) 32 /**< registry trigger action */ -#define ORTE_GPR_CMD (orte_data_type_t) 33 /**< registry command */ -#define ORTE_GPR_SUBSCRIPTION_ID (orte_data_type_t) 34 /**< registry notify id tag */ -#define ORTE_GPR_TRIGGER_ID (orte_data_type_t) 35 /**< registry notify id tag */ -#define ORTE_GPR_VALUE (orte_data_type_t) 36 /**< registry return value */ -#define ORTE_GPR_ADDR_MODE (orte_data_type_t) 37 /**< Addressing mode for registry cmds */ -#define ORTE_GPR_SUBSCRIPTION (orte_data_type_t) 38 /**< describes data returned by subscription */ -#define ORTE_GPR_TRIGGER (orte_data_type_t) 39 /**< describes trigger conditions */ -#define ORTE_GPR_NOTIFY_DATA (orte_data_type_t) 40 /**< data returned from a subscription */ -#define ORTE_GPR_NOTIFY_MSG (orte_data_type_t) 41 /**< notify message containing notify_data objects */ -#define ORTE_GPR_NOTIFY_MSG_TYPE (orte_data_type_t) 42 /**< notify message type (subscription or trigger) */ -#define ORTE_GPR_SEARCH (orte_data_type_t) 43 /**< search criteria */ -#define ORTE_GPR_UPDATE (orte_data_type_t) 44 /**< update data on the registry */ -/* Resource Manager types */ -#define ORTE_APP_CONTEXT (orte_data_type_t) 45 /**< argv and enviro arrays */ -#define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 46 /**< application context mapping array */ -#define ORTE_NODE_DESC (orte_data_type_t) 47 /**< describes capabilities of nodes */ -#define ORTE_SLOT_DESC (orte_data_type_t) 48 /**< describes slot allocations/reservations */ -#define ORTE_RAS_NODE (orte_data_type_t) 49 /**< node information */ -#define ORTE_JOB_MAP (orte_data_type_t) 50 /**< map of process locations */ -#define ORTE_MAPPED_PROC (orte_data_type_t) 51 /**< process entry on map */ -#define ORTE_MAPPED_NODE (orte_data_type_t) 52 /**< node entry on map */ -#define ORTE_ATTRIBUTE (orte_data_type_t) 53 /**< attribute used to control framework behavior */ -#define ORTE_ATTR_LIST (orte_data_type_t) 54 /**< list of attributes */ -/* RML types */ -#define ORTE_RML_TAG (orte_data_type_t) 55 /**< tag for sending/receiving messages */ - -/* DAEMON communication type */ -#define ORTE_DAEMON_CMD (orte_data_type_t) 56 /**< command flag for communicating with the daemon */ - -/* Need a command separate from ORTE_DAEMON_CMD, so that we can receive on - * them both at the same time */ -#define ORTE_CKPT_CMD (orte_data_type_t) 61 /**< command flag for communicating with HNP */ -/* define the starting point for dynamically assigning data types */ -#define ORTE_DSS_ID_DYNAMIC 70 - -#endif diff --git a/orte/include/orte/types.h b/orte/include/orte/types.h new file mode 100644 index 0000000000..29b1aaffbc --- /dev/null +++ b/orte/include/orte/types.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file */ + +#ifndef ORTE_TYPES_H +#define ORTE_TYPES_H + +#include "orte_config.h" + +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#include "opal/dss/dss_types.h" + +/** + * Supported datatypes for messaging and storage operations. + */ + +typedef int32_t orte_std_cntr_t; /** standard counters used in ORTE */ +#define ORTE_STD_CNTR_T OPAL_INT32 +#define ORTE_STD_CNTR_MAX INT32_MAX +#define ORTE_STD_CNTR_MIN INT32_MIN +#define ORTE_STD_CNTR_INVALID -1 + +/* + * general typedefs & structures + */ +/** Set the allowed range for ids in each space + * + * NOTE: Be sure to update the ORTE_NAME_ARGS #define (above) and all + * uses of it if these types change to be larger than (long)! The + * HTON and NTOH macros below must be updated, as well as the MIN / + * MAX macros below and the datatype packing representations in + * orte/mca/plm/base/plm_private.h + * + * NOTE: Be sure to keep the jobid and vpid types the same size! Due + * to padding rules, it won't save anything to have one larger than + * the other, and it will cause problems in the communication subsystems + */ + +typedef uint32_t orte_jobid_t; +#define ORTE_JOBID_T OPAL_UINT32 +#define ORTE_JOBID_MAX UINT32_MAX-2 +#define ORTE_JOBID_MIN 0 +typedef uint32_t orte_vpid_t; +#define ORTE_VPID_T OPAL_UINT32 +#define ORTE_VPID_MAX UINT32_MAX-2 +#define ORTE_VPID_MIN 0 + +#define ORTE_PROCESS_NAME_HTON(n) \ +do { \ + n.jobid = htonl(n.jobid); \ + n.vpid = htonl(n.vpid); \ +} while (0) + +#define ORTE_PROCESS_NAME_NTOH(n) \ +do { \ + n.jobid = ntohl(n.jobid); \ + n.vpid = ntohl(n.vpid); \ +} while (0) + +#define ORTE_NAME_ARGS(n) \ + (unsigned long) ((NULL == n) ? (unsigned long)ORTE_JOBID_INVALID : (unsigned long)(n)->jobid), \ + (unsigned long) ((NULL == n) ? (unsigned long)ORTE_VPID_INVALID : (unsigned long)(n)->vpid) + +/* + * define invalid values + */ +#define ORTE_JOBID_INVALID (ORTE_JOBID_MAX + 2) +#define ORTE_VPID_INVALID (ORTE_VPID_MAX + 2) + +/* + * define wildcard values + */ +#define ORTE_JOBID_WILDCARD (ORTE_JOBID_MAX + 1) +#define ORTE_VPID_WILDCARD (ORTE_VPID_MAX + 1) + +/* + * define the process name structure + */ +struct orte_process_name_t { + orte_jobid_t jobid; /**< Job number */ + orte_vpid_t vpid; /**< Process id - equivalent to rank */ +}; +typedef struct orte_process_name_t orte_process_name_t; + +/* + * define a generic id for nodes + */ +typedef int32_t orte_nodeid_t; +#define ORTE_NODEID OPAL_INT32 +#define ORTE_NODEID_WILDCARD -1 +#define ORTE_NODEID_INVALID INT32_MIN + + +/** + * handle differences in iovec + */ + +#if defined(__APPLE__) || defined(__WINDOWS__) +typedef char* orte_iov_base_ptr_t; +#else +typedef void* orte_iov_base_ptr_t; +#endif + + +/* General ORTE types - support handled within DSS */ +#define ORTE_STD_CNTR (OPAL_DSS_ID_DYNAMIC + 1) /**< standard counter type */ +/* PLM types */ + /* Name-related types */ +#define ORTE_NAME (OPAL_DSS_ID_DYNAMIC + 2) /**< an orte_process_name_t */ +#define ORTE_VPID (OPAL_DSS_ID_DYNAMIC + 3) /**< a vpid */ +#define ORTE_JOBID (OPAL_DSS_ID_DYNAMIC + 4) /**< a jobid */ + /* State-related types */ +#define ORTE_NODE_STATE (OPAL_DSS_ID_DYNAMIC + 5) /**< node status flag */ +#define ORTE_PROC_STATE (OPAL_DSS_ID_DYNAMIC + 6) /**< process/resource status */ +#define ORTE_JOB_STATE (OPAL_DSS_ID_DYNAMIC + 7) /**< job status flag */ +#define ORTE_EXIT_CODE (OPAL_DSS_ID_DYNAMIC + 8) /**< process exit code */ + /* Data-passing types */ +#define ORTE_VALUE (OPAL_DSS_ID_DYNAMIC + 9) /**< registry return value */ + /* Resource types */ +#define ORTE_APP_CONTEXT (OPAL_DSS_ID_DYNAMIC + 10) /**< argv and enviro arrays */ +#define ORTE_APP_CONTEXT_MAP (OPAL_DSS_ID_DYNAMIC + 11) /**< application context mapping array */ +#define ORTE_NODE_DESC (OPAL_DSS_ID_DYNAMIC + 12) /**< describes capabilities of nodes */ +#define ORTE_SLOT_DESC (OPAL_DSS_ID_DYNAMIC + 13) /**< describes slot allocations/reservations */ +#define ORTE_JOB (OPAL_DSS_ID_DYNAMIC + 14) /**< job information */ +#define ORTE_NODE (OPAL_DSS_ID_DYNAMIC + 15) /**< node information */ +#define ORTE_PROC (OPAL_DSS_ID_DYNAMIC + 16) /**< process information */ +#define ORTE_JOB_MAP (OPAL_DSS_ID_DYNAMIC + 17) /**< map of process locations */ + +/* RML types */ +#define ORTE_RML_TAG (OPAL_DSS_ID_DYNAMIC + 18) /**< tag for sending/receiving messages */ + +/* DAEMON command type */ +#define ORTE_DAEMON_CMD (OPAL_DSS_ID_DYNAMIC + 19) /**< command flag for communicating with the daemon */ + +/* GRPCOMM types */ +#define ORTE_GRPCOMM_MODE (OPAL_DSS_ID_DYNAMIC + 20) + +#endif diff --git a/orte/mca/errmgr/base/Makefile.am b/orte/mca/errmgr/base/Makefile.am index 04be5aae25..7a8dbabb5b 100644 --- a/orte/mca/errmgr/base/Makefile.am +++ b/orte/mca/errmgr/base/Makefile.am @@ -22,7 +22,6 @@ headers += \ libmca_errmgr_la_SOURCES += \ base/errmgr_base_close.c \ - base/errmgr_base_receive.c \ base/errmgr_base_select.c \ base/errmgr_base_open.c \ base/errmgr_base_fns.c diff --git a/orte/mca/errmgr/base/base.h b/orte/mca/errmgr/base/base.h index 0bf0b1c6d4..3ff4fe50d6 100644 --- a/orte/mca/errmgr/base/base.h +++ b/orte/mca/errmgr/base/base.h @@ -25,12 +25,11 @@ * includes */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/class/opal_list.h" #include "opal/mca/mca.h" -#include "orte/mca/ns/ns_types.h" #include "orte/mca/errmgr/errmgr.h" @@ -53,14 +52,11 @@ ORTE_DECLSPEC int orte_errmgr_base_close(void); * globals that might be needed */ -ORTE_DECLSPEC extern int orte_errmgr_base_output; extern bool orte_errmgr_base_selected; extern bool orte_errmgr_initialized; ORTE_DECLSPEC extern opal_list_t orte_errmgr_base_components_available; ORTE_DECLSPEC extern mca_errmgr_base_component_t orte_errmgr_base_selected_component; -/* make the default module available so that close can use it */ -ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr_default; /* * external API functions will be documented in the mca/errmgr/errmgr.h file */ diff --git a/orte/mca/errmgr/base/errmgr_base_close.c b/orte/mca/errmgr/base/errmgr_base_close.c index bc52a3f390..4239704b40 100644 --- a/orte/mca/errmgr/base/errmgr_base_close.c +++ b/orte/mca/errmgr/base/errmgr_base_close.c @@ -17,40 +17,38 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include -#include "orte/orte_constants.h" #include "opal/util/trace.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/base/base.h" +#include "orte/mca/errmgr/base/errmgr_private.h" int orte_errmgr_base_close(void) { OPAL_TRACE(5); - /* If we have a selected component and module, then finalize it */ - - if (orte_errmgr_base_selected) { - orte_errmgr_base_selected_component.errmgr_finalize(); - } - - /* Close all remaining available components (may be one if this is a - OMPI RTE program, or [possibly] multiple if this is ompi_info) */ - - mca_base_components_close(orte_errmgr_base_output, - &orte_errmgr_base_components_available, NULL); - + /* If we have a selected component and module, then finalize it */ + + if (orte_errmgr_base_selected) { + orte_errmgr_base_selected_component.errmgr_finalize(); + } + + /* Close all remaining available components (may be one if this is a + OMPI RTE program, or [possibly] multiple if this is ompi_info) */ + + mca_base_components_close(orte_errmgr_base_output, + &orte_errmgr_base_components_available, NULL); + orte_errmgr_initialized = false; - /* set the module back to the default so that error logging can continue */ - orte_errmgr = orte_errmgr_default; + /* All done */ - /* All done */ - - return ORTE_SUCCESS; + return ORTE_SUCCESS; } diff --git a/orte/mca/errmgr/base/errmgr_base_fns.c b/orte/mca/errmgr/base/errmgr_base_fns.c index 3c73ccd88f..b71000aa58 100644 --- a/orte/mca/errmgr/base/errmgr_base_fns.c +++ b/orte/mca/errmgr/base/errmgr_base_fns.c @@ -18,16 +18,21 @@ #include "orte_config.h" +#include "orte/constants.h" + #ifdef HAVE_UNISTD_H #include #endif #include -#include "orte/orte_constants.h" #include "opal/util/output.h" #include "opal/util/trace.h" -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" +#include "opal/util/error.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/util/session_dir.h" +#include "orte/mca/ess/ess.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/base/errmgr_private.h" @@ -43,39 +48,45 @@ void orte_errmgr_base_log(int error_code, char *filename, int line) } opal_output(0, "%s ORTE_ERROR_LOG: %s in file %s at line %d", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(error_code), filename, line); } -int orte_errmgr_base_proc_aborted_not_avail(orte_gpr_notify_message_t *msg) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -int orte_errmgr_base_incomplete_start_not_avail(orte_gpr_notify_message_t *msgb) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -void orte_errmgr_base_error_detected(int error_code, char *fmt, ...) -{ - /* we can't know if any output is available yet, so - * we just exit */ - exit(error_code); -} - -void orte_errmgr_base_abort(void) -{ - /* guess we should exit */ - exit(-1); -} - -int orte_errmgr_base_register_job_not_avail(orte_jobid_t job) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -int orte_errmgr_base_abort_procs_request_not_avail(orte_process_name_t *procs, orte_std_cntr_t num_procs) +void orte_errmgr_base_proc_aborted_not_avail(orte_process_name_t *name, int exit_code) +{ + return; +} + +void orte_errmgr_base_incomplete_start_not_avail(orte_jobid_t job, int exit_code) +{ + return; +} + +void orte_errmgr_base_error_abort(int error_code, char *fmt, ...) +{ + va_list arglist; + + /* If there was a message, output it */ + va_start(arglist, fmt); + if( NULL != fmt ) { + char* buffer = NULL; + vasprintf( &buffer, fmt, arglist ); + opal_output( 0, buffer ); + free( buffer ); + } + va_end(arglist); + + /* cleanup my session directory */ + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* abnormal exit */ + orte_ess.abort(error_code, false); +} + +int orte_errmgr_base_register_cb_not_avail(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata) { return ORTE_ERR_NOT_AVAILABLE; } diff --git a/orte/mca/errmgr/base/errmgr_base_open.c b/orte/mca/errmgr/base/errmgr_base_open.c index 1f4e65533b..06609cb5a6 100644 --- a/orte/mca/errmgr/base/errmgr_base_open.c +++ b/orte/mca/errmgr/base/errmgr_base_open.c @@ -18,7 +18,7 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" @@ -47,20 +47,17 @@ */ int orte_errmgr_base_output = -1; /* - * we must define a default module so that the error logging - * functions can be available as early as possible + * define a default module that all application procs + * can use without having to open the framework. The + * decision on whether or not to open the framework is + * made in orte_init */ -orte_errmgr_base_module_t orte_errmgr_default = { - orte_errmgr_base_log, +orte_errmgr_base_module_t orte_errmgr = { orte_errmgr_base_proc_aborted_not_avail, orte_errmgr_base_incomplete_start_not_avail, - orte_errmgr_base_error_detected, - orte_errmgr_base_register_job_not_avail, - orte_errmgr_base_abort, - orte_errmgr_base_abort_procs_request_not_avail + orte_errmgr_base_register_cb_not_avail, + orte_errmgr_base_error_abort }; -/* start out with a default module */ -orte_errmgr_base_module_t orte_errmgr; bool orte_errmgr_base_selected = false; opal_list_t orte_errmgr_base_components_available; @@ -90,9 +87,6 @@ int orte_errmgr_base_open(void) orte_errmgr_base_output = -1; } - /* set the default module */ - orte_errmgr = orte_errmgr_default; - /* Open up all available components */ if (ORTE_SUCCESS != diff --git a/orte/mca/errmgr/base/errmgr_base_receive.c b/orte/mca/errmgr/base/errmgr_base_receive.c deleted file mode 100644 index 4800c486e1..0000000000 --- a/orte/mca/errmgr/base/errmgr_base_receive.c +++ /dev/null @@ -1,180 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/dss/dss.h" -#include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/errmgr/base/errmgr_private.h" - -static bool recv_issued=false; - -int orte_errmgr_base_comm_start(void) -{ - int rc; - - if (recv_issued) { - return ORTE_SUCCESS; - } - - if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_ERRMGR, - ORTE_RML_PERSISTENT, - orte_errmgr_base_recv, - NULL))) { - ORTE_ERROR_LOG(rc); - } - recv_issued = true; - - return rc; -} - -int orte_errmgr_base_comm_stop(void) -{ - int rc; - - if (!recv_issued) { - return ORTE_SUCCESS; - } - - if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ERRMGR))) { - ORTE_ERROR_LOG(rc); - } - recv_issued = false; - - return rc; -} - - - -/* - * handle message from proxies - * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. - * DO NOT RELEASE THIS BUFFER IN THIS CODE - */ - -void orte_errmgr_base_recv(int status, orte_process_name_t* sender, - orte_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_buffer_t answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count, nprocs; - orte_process_name_t *procs; - orte_jobid_t jobid; - int rc; - - OPAL_TRACE(2); - - /* get the command */ - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* setup to return an answer */ - OBJ_CONSTRUCT(&answer, orte_buffer_t); - - /* pack the command in the answer - this is done to allow the caller to check - * that we are talking about the same command - */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - return; - } - - switch (command) { - case ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD: - /* get the number of processes */ - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &nprocs, &count, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - /* get the required space */ - procs = (orte_process_name_t*)malloc(nprocs * sizeof(orte_process_name_t)); - if (NULL == procs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - goto SEND_ANSWER; - } - - /* unpack the array of process names */ - count = nprocs; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, procs, &count, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - /* if we didn't get the number we requested, then something is wrong */ - if (count != nprocs) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto SEND_ANSWER; - } - - /* process the request */ - if (ORTE_SUCCESS != (rc = orte_errmgr.abort_procs_request(procs, nprocs))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - break; - - case ORTE_ERRMGR_REGISTER_JOB_CMD: - /* register the job to monitor for alerts */ - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &jobid, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - /* process the request */ - if (ORTE_SUCCESS != (rc = orte_errmgr.register_job(jobid))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); - } - -SEND_ANSWER: - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - } - - /* cleanup */ - OBJ_DESTRUCT(&answer); -} - diff --git a/orte/mca/errmgr/base/errmgr_base_select.c b/orte/mca/errmgr/base/errmgr_base_select.c index d808fbed78..73d294d7d7 100644 --- a/orte/mca/errmgr/base/errmgr_base_select.c +++ b/orte/mca/errmgr/base/errmgr_base_select.c @@ -35,7 +35,6 @@ int orte_errmgr_base_select(void) mca_base_component_list_item_t *cli; mca_errmgr_base_component_t *component, *best_component = NULL; orte_errmgr_base_module_t *module, *best_module = NULL; - bool multi, hidden; int priority, best_priority = -1; /* Iterate through all the available components */ @@ -49,7 +48,7 @@ int orte_errmgr_base_select(void) /* Call the component's init function and see if it wants to be selected */ - module = component->errmgr_init(&multi, &hidden, &priority); + module = component->errmgr_init(&priority); /* If we got a non-NULL module back, then the component wants to be selected. So save its multi/hidden values and save the diff --git a/orte/mca/errmgr/base/errmgr_private.h b/orte/mca/errmgr/base/errmgr_private.h index 2c1d0d4322..5a439f35ef 100644 --- a/orte/mca/errmgr/base/errmgr_private.h +++ b/orte/mca/errmgr/base/errmgr_private.h @@ -25,35 +25,32 @@ * includes */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" +#include "orte/types.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/rml/rml.h" +#include "opal/dss/dss_types.h" +#include "orte/mca/rml/rml_types.h" +#include "orte/mca/plm/plm_types.h" + +#include "orte/mca/errmgr/errmgr.h" /* * Functions for use solely within the ERRMGR framework */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /* Define the ERRMGR command flag */ typedef uint8_t orte_errmgr_cmd_flag_t; -#define ORTE_ERRMGR_CMD ORTE_UINT8 +#define ORTE_ERRMGR_CMD OPAL_UINT8 /* define some commands */ #define ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD 0x01 -#define ORTE_ERRMGR_REGISTER_JOB_CMD 0x02 +#define ORTE_ERRMGR_REGISTER_CALLBACK_CMD 0x02 -/* Internal support */ -ORTE_DECLSPEC int orte_errmgr_base_comm_start(void); -ORTE_DECLSPEC int orte_errmgr_base_comm_stop(void); -void orte_errmgr_base_recv(int status, orte_process_name_t* sender, - orte_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); - +/* provide access to verbose output channel */ +ORTE_DECLSPEC extern int orte_errmgr_base_output; + /* * Base functions @@ -61,23 +58,20 @@ void orte_errmgr_base_recv(int status, orte_process_name_t* sender, ORTE_DECLSPEC void orte_errmgr_base_log(int error_code, char *filename, int line); -ORTE_DECLSPEC int orte_errmgr_base_proc_aborted_not_avail(orte_gpr_notify_message_t *msg); +ORTE_DECLSPEC void orte_errmgr_base_proc_aborted_not_avail(orte_process_name_t *name, int exit_code); -ORTE_DECLSPEC int orte_errmgr_base_incomplete_start_not_avail(orte_gpr_notify_message_t *msg); +ORTE_DECLSPEC void orte_errmgr_base_incomplete_start_not_avail(orte_jobid_t job, int exit_code); -ORTE_DECLSPEC void orte_errmgr_base_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); +ORTE_DECLSPEC void orte_errmgr_base_error_abort(int error_code, char *fmt, ...); -ORTE_DECLSPEC int orte_errmgr_base_register_job_not_avail(orte_jobid_t job); - -ORTE_DECLSPEC void orte_errmgr_base_abort(void) __opal_attribute_noreturn__; - -ORTE_DECLSPEC int orte_errmgr_base_abort_procs_request_not_avail(orte_process_name_t *procs, orte_std_cntr_t num_procs); +ORTE_DECLSPEC int orte_errmgr_base_register_cb_not_avail(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata); /* * external API functions will be documented in the mca/errmgr/errmgr.h file */ -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/errmgr/bproc/Makefile.am b/orte/mca/errmgr/bproc/Makefile.am deleted file mode 100644 index a5aab9b7a8..0000000000 --- a/orte/mca/errmgr/bproc/Makefile.am +++ /dev/null @@ -1,48 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include $(errmgr_bproc_CPPFLAGS) - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_errmgr_bproc_DSO -component_noinst = -component_install = mca_errmgr_bproc.la -else -component_noinst = libmca_errmgr_bproc.la -component_install = -endif - -sources = \ - errmgr_bproc.h \ - errmgr_bproc.c \ - errmgr_bproc_component.c - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_errmgr_bproc_la_SOURCES = $(sources) -mca_errmgr_bproc_la_LIBS = $(errmgr_bproc_LIBS) -mca_errmgr_bproc_la_LDFLAGS = -module -avoid-version $(errmgr_bproc_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_errmgr_bproc_la_SOURCES = $(sources) -libmca_errmgr_bproc_la_LIBADD = $(errmgr_bproc_LIBS) -libmca_errmgr_bproc_la_LDFLAGS = -module -avoid-version $(errmgr_bproc_LDFLAGS) diff --git a/orte/mca/errmgr/bproc/configure.m4 b/orte/mca/errmgr/bproc/configure.m4 deleted file mode 100644 index 5fc4f86287..0000000000 --- a/orte/mca/errmgr/bproc/configure.m4 +++ /dev/null @@ -1,38 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_errmgr_bproc_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_errmgr_bproc_CONFIG],[ - OMPI_CHECK_BPROC([errmgr_bproc], [errmgr_bproc_good=1], - [errmgr_bproc_good=1], [errmgr_bproc_good=0]) - - # if check worked, set wrapper flags if so. - # Evaluate succeed / fail - AS_IF([test "$errmgr_bproc_good" = "1"], - [errmgr_bproc_WRAPPER_EXTRA_LDFLAGS="$errmgr_bproc_LDFLAGS" - errmgr_bproc_WRAPPER_EXTRA_LIBS="$errmgr_bproc_LIBS" - $1], - [$2]) - - # set build flags to use in makefile - AC_SUBST([errmgr_bproc_CPPFLAGS]) - AC_SUBST([errmgr_bproc_LDFLAGS]) - AC_SUBST([errmgr_bproc_LIBS]) -])dnl diff --git a/orte/mca/errmgr/bproc/errmgr_bproc.c b/orte/mca/errmgr/bproc/errmgr_bproc.c deleted file mode 100644 index 6856eba6c2..0000000000 --- a/orte/mca/errmgr/bproc/errmgr_bproc.c +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/runtime/runtime.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/errmgr/base/errmgr_private.h" -#include "orte/mca/errmgr/bproc/errmgr_bproc.h" - -/* - * This function gets called when the SMR updates a process state to - * indicate that it aborted. Since the bproc component is only active on - * non-HNP processes, this function will NEVER be called - */ -int orte_errmgr_bproc_proc_aborted(orte_gpr_notify_message_t *msg) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when the SMR updates a process state to - * indicate that it failed to start. Since the bproc component is only active on - * non-HNP processes, this function will NEVER be called - */ -int orte_errmgr_bproc_incomplete_start(orte_gpr_notify_message_t *msg) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when a process detects an internal error. - * Bproc is unusually bad about letting us pass information that we - * aborted as opposed to normally terminated. There is no way to locally - * monitor the process state on a remote node, so the only thing we - * can do is pass the info back to the Bproc PLS on the HNP and let it - * figure out what to do. - */ -void orte_errmgr_bproc_error_detected(int error_code, char *fmt, ...) -{ - va_list arglist; - orte_buffer_t* cmd; - uint8_t command; - int rc; - - OPAL_TRACE(1); - - /* If there was a message, output it */ - va_start(arglist, fmt); - if( NULL != fmt ) { - char* buffer = NULL; - vasprintf( &buffer, fmt, arglist ); - opal_output( 0, buffer ); - free( buffer ); - } - va_end(arglist); - - /* Now prepare and send a message to the BProc PLS so it knows that - * we abnormally terminated. It doesn't matter what is in the - * message - the fact that it gets received is adequate - */ - command = 0x01; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return; - } - - /* just pack something */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return; - } - - /* send the alert */ - if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_BPROC_ABORT, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return; - } - OBJ_RELEASE(cmd); - - /* okay, now we can truly abort. Tell the abort function not to bother writing out - * an abort file - we can't do anything with it anyway! - */ - orte_abort(error_code, false); -} - -/* - * This function gets called when a process desperately needs to just die. - * Nothing can be done by definition here - this function ONLY gets - * called as an absolute last resort. - */ -void orte_errmgr_bproc_abort() -{ - /* abnormal exit - no point in writing out an abort file as bproc doesn't - * know what to do with it anyway - */ - orte_abort(-1, false); -} - -/* - * Alternatively, some systems (e.g., OpenMPI) need to tell us to kill - * some other subset of processes along with us. Send that info to the - * HNP so it can kill them. - * - * NOTE: this function assumes that the underlying ORTE infrastructure is - * still operational. Use of this function should therefore be restricted - * to cases where the problem is in a higher layer (e.g., MPI) as the - * process is likely to "hang" if an ORTE problem has been encountered. - */ -int orte_errmgr_bproc_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - /* protect us against error */ - if (NULL == procs) { - return ORTE_ERR_BAD_PARAM; - } - - command = ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the number of procs we are requesting be aborted */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &nprocs, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the array of proc names */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, procs, nprocs, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_bproc_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - -/* - * It is imperative that ONLY an HNP perform this registration! - */ -int orte_errmgr_bproc_register_job(orte_jobid_t job) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - command = ORTE_ERRMGR_REGISTER_JOB_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the jobid we are requesting be monitored */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_bproc_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_REGISTER_JOB_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/bproc/errmgr_bproc.h b/orte/mca/errmgr/bproc/errmgr_bproc.h deleted file mode 100644 index 2966771003..0000000000 --- a/orte/mca/errmgr/bproc/errmgr_bproc.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef ORTE_ERRMGR_BPROC_H -#define ORTE_ERRMGR_BPROC_H - - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" - -#include "orte/mca/errmgr/errmgr.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_errmgr_bproc_open(void); -int orte_errmgr_bproc_close(void); - - -/* - * Startup / Shutdown - */ -orte_errmgr_base_module_t* -orte_errmgr_bproc_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); - -int orte_errmgr_bproc_finalize(void); - -/* - * globals used within the component - */ -typedef struct { - int debug; - orte_process_name_t *replica; -} orte_errmgr_bproc_globals_t; - - -extern orte_errmgr_bproc_globals_t orte_errmgr_bproc_globals; - -/* - * Component API functions - */ -int orte_errmgr_bproc_proc_aborted(orte_gpr_notify_message_t *msg); - -int orte_errmgr_bproc_incomplete_start(orte_gpr_notify_message_t *msg); - -void orte_errmgr_bproc_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); - -void orte_errmgr_bproc_abort(void) __opal_attribute_noreturn__; - -int orte_errmgr_bproc_register_job(orte_jobid_t job); - -int orte_errmgr_bproc_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/errmgr/bproc/errmgr_bproc_component.c b/orte/mca/errmgr/bproc/errmgr_bproc_component.c deleted file mode 100644 index 712c1481be..0000000000 --- a/orte/mca/errmgr/bproc/errmgr_bproc_component.c +++ /dev/null @@ -1,165 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_bproc.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_errmgr_base_component_t mca_errmgr_bproc_component = { - { - ORTE_ERRMGR_BASE_VERSION_1_3_0, - - "bproc", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_errmgr_bproc_open, /* module open */ - orte_errmgr_bproc_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_errmgr_bproc_component_init, /* module init */ - orte_errmgr_bproc_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_errmgr_base_module_t orte_errmgr_bproc = { - orte_errmgr_base_log, - orte_errmgr_bproc_proc_aborted, - orte_errmgr_bproc_incomplete_start, - orte_errmgr_bproc_error_detected, - orte_errmgr_bproc_register_job, - orte_errmgr_bproc_abort, - orte_errmgr_bproc_abort_procs_request -}; - - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* local globals */ -orte_errmgr_bproc_globals_t orte_errmgr_bproc_globals; - -/* - * Open the component - */ -int orte_errmgr_bproc_open(void) -{ - int id, tmp; - - id = mca_base_param_register_int("errmgr", "bproc", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_errmgr_bproc_globals.debug = true; - } else { - orte_errmgr_bproc_globals.debug = false; - } - - return ORTE_SUCCESS; -} - -/* - * Close the component - */ -int orte_errmgr_bproc_close(void) -{ - return ORTE_SUCCESS; -} - -orte_errmgr_base_module_t* -orte_errmgr_bproc_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - if (orte_errmgr_bproc_globals.debug) { - opal_output(0, "errmgr_bproc_init called"); - } - - /* If we are an HNP or an orted, then don't pick us! */ - if (orte_process_info.seed || orte_process_info.daemon) { - /* don't take me! */ - return NULL; - } - - /* Return a module (choose an arbitrary, positive priority -- - absolutely must be higher than the proxy component - */ - - *priority = 100; - - /* no part of OpenRTE allows or has threads */ - - *allow_multi_user_threads = false; - *have_hidden_threads = false; - - /* define the replica for us to use - for now, just point - * to the name service replica - */ - orte_errmgr_bproc_globals.replica = orte_process_info.ns_replica; - - initialized = true; - return &orte_errmgr_bproc; -} - -/* - * finalize routine - */ -int orte_errmgr_bproc_finalize(void) -{ - if (orte_errmgr_bproc_globals.debug) { - opal_output(0, "%s errmgr_bproc_finalize called", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - initialized = false; - - /* All done */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/orted/Makefile.am b/orte/mca/errmgr/default/Makefile.am similarity index 72% rename from orte/mca/errmgr/orted/Makefile.am rename to orte/mca/errmgr/default/Makefile.am index 7c06efb6f4..a32c8bbf1e 100644 --- a/orte/mca/errmgr/orted/Makefile.am +++ b/orte/mca/errmgr/default/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - errmgr_orted.h \ - errmgr_orted_component.c \ - errmgr_orted.c + errmgr_default.h \ + errmgr_default_component.c \ + errmgr_default.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_errmgr_orted_DSO +if OMPI_BUILD_errmgr_default_DSO component_noinst = -component_install = mca_errmgr_orted.la +component_install = mca_errmgr_default.la else -component_noinst = libmca_errmgr_orted.la +component_noinst = libmca_errmgr_default.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_errmgr_orted_la_SOURCES = $(sources) -mca_errmgr_orted_la_LDFLAGS = -module -avoid-version +mca_errmgr_default_la_SOURCES = $(sources) +mca_errmgr_default_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_errmgr_orted_la_SOURCES =$(sources) -libmca_errmgr_orted_la_LDFLAGS = -module -avoid-version +libmca_errmgr_default_la_SOURCES =$(sources) +libmca_errmgr_default_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/errmgr/orted/configure.params b/orte/mca/errmgr/default/configure.params similarity index 100% rename from orte/mca/errmgr/orted/configure.params rename to orte/mca/errmgr/default/configure.params diff --git a/orte/mca/errmgr/default/errmgr_default.c b/orte/mca/errmgr/default/errmgr_default.c new file mode 100644 index 0000000000..2c708b02a9 --- /dev/null +++ b/orte/mca/errmgr/default/errmgr_default.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include + +#include "opal/class/opal_list.h" +#include "opal/util/trace.h" +#include "opal/util/output.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wakeup.h" +#include "orte/mca/plm/plm.h" +#include "orte/util/session_dir.h" +#include "orte/util/name_fns.h" + +#include "orte/mca/errmgr/base/errmgr_private.h" +#include "errmgr_default.h" + +/* + * This function gets called by the PLM when an orted notifies us + * that a process has aborted + * Various components will follow their own strategy for dealing with + * this situation. For this component, we simply kill the job. + */ +void orte_errmgr_default_proc_aborted(orte_process_name_t *name, int exit_code) +{ + int rc; + orte_job_t **jobs; + orte_std_cntr_t i; + + OPAL_TRACE(1); + + /* if we are already in progress, then ignore this call */ + if (orte_abort_in_progress) { + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, + "%s errmgr:default: abort in progress, ignoring proc %s aborted with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(name), exit_code)); + + return; + } + + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, + "%s errmgr:default: proc %s aborting with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(name), exit_code)); + + /* flag that we are aborting */ + orte_abort_in_progress = true; + + /* indicate that all jobs other than the one containing this + * proc have been orted to abort - this is necessary to avoid + * duplicate ordering of "abort". + * + * NOTE: be sure to not include the 0 job data location as this + * contains the daemons! + */ + jobs = (orte_job_t**)orte_job_data->addr; + for (i=1; i < orte_job_data->size; i++) { + /* the array is left justfied, so we can quit once + * we see a NULL + */ + if (NULL == jobs[i]) { + break; + } + if (ORTE_JOB_STATE_ABORTED != jobs[i]->state && + ORTE_JOB_STATE_ABORTED_BY_SIG != jobs[i]->state) { + jobs[i]->state = ORTE_JOB_STATE_ABORT_ORDERED; + } + } + + /* tell the plm to terminate all jobs */ + if (ORTE_SUCCESS != (rc = orte_plm.terminate_job(ORTE_JOBID_WILDCARD))) { + ORTE_ERROR_LOG(rc); + } + + /* wakeup orterun so we can exit */ + if (ORTE_SUCCESS != (rc = orte_wakeup(exit_code))) { + ORTE_ERROR_LOG(rc); + } +} + +/* + * This function gets called by the PLM when an orted notifies us that + * a job failed to start. + * Various components will follow their own strategy for dealing with + * this situation. For this component, we simply kill the job. + */ +void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code) +{ + int rc; + + OPAL_TRACE(1); + + /* if we are already in progress, then ignore this call */ + if (orte_abort_in_progress) { + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, + "%s errmgr:default: abort in progress, ignoring incomplete start on job %s with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job), exit_code)); + + return; + } + + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, + "%s errmgr:default: job %s reported incomplete start with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job), exit_code)); + + /* flag that we are aborting */ + orte_abort_in_progress = true; + + /* tell the plm to terminate all job */ + if (ORTE_SUCCESS != (rc = orte_plm.terminate_job(ORTE_JOBID_WILDCARD))) { + ORTE_ERROR_LOG(rc); + } + + /* wakeup orterun so we can exit */ + if (ORTE_SUCCESS != (rc = orte_wakeup(exit_code))) { + ORTE_ERROR_LOG(rc); + } +} + +/* + * Register a callback function upon a change to a specified job state. + */ +int orte_errmgr_default_register_callback(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata) +{ + return ORTE_ERR_NOT_IMPLEMENTED; +} diff --git a/orte/mca/errmgr/hnp/errmgr_hnp.h b/orte/mca/errmgr/default/errmgr_default.h similarity index 50% rename from orte/mca/errmgr/hnp/errmgr_hnp.h rename to orte/mca/errmgr/default/errmgr_default.h index 8dc79d13e4..7338199698 100644 --- a/orte/mca/errmgr/hnp/errmgr_hnp.h +++ b/orte/mca/errmgr/default/errmgr_default.h @@ -22,60 +22,43 @@ #include "orte_config.h" -#include "orte/orte_types.h" +#include "orte/types.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/mca/plm/plm_types.h" #include "orte/mca/errmgr/errmgr.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /* * Module open / close */ -int orte_errmgr_hnp_open(void); -int orte_errmgr_hnp_close(void); +int orte_errmgr_default_open(void); +int orte_errmgr_default_close(void); /* * Startup / Shutdown */ orte_errmgr_base_module_t* -orte_errmgr_hnp_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); +orte_errmgr_default_component_init(int *priority); -int orte_errmgr_hnp_finalize(void); - -/* - * globals used within the component - */ -typedef struct { - int debug; -} orte_errmgr_hnp_globals_t; - - -extern orte_errmgr_hnp_globals_t orte_errmgr_hnp_globals; +int orte_errmgr_default_finalize(void); /* * Component API functions */ -int orte_errmgr_hnp_proc_aborted(orte_gpr_notify_message_t *msg); +void orte_errmgr_default_proc_aborted(orte_process_name_t *name, int exit_code); -int orte_errmgr_hnp_incomplete_start(orte_gpr_notify_message_t *msg); +void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code); -void orte_errmgr_hnp_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); +int orte_errmgr_default_register_callback(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata); -void orte_errmgr_hnp_abort(void) __opal_attribute_noreturn__; +ORTE_MODULE_DECLSPEC extern mca_errmgr_base_component_t mca_errmgr_default_component; -int orte_errmgr_hnp_register_job(orte_jobid_t job); - -int orte_errmgr_hnp_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); - -ORTE_MODULE_DECLSPEC extern mca_errmgr_base_component_t mca_errmgr_hnp_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/errmgr/default/errmgr_default_component.c b/orte/mca/errmgr/default/errmgr_default_component.c new file mode 100644 index 0000000000..51fbaf940a --- /dev/null +++ b/orte/mca/errmgr/default/errmgr_default_component.c @@ -0,0 +1,121 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Proxy component + * + */ + +/* + * includes + */ +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/rml/rml.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/errmgr/base/errmgr_private.h" + +#include "errmgr_default.h" + + +/* + * Struct of function pointers that need to be initialized + */ +mca_errmgr_base_component_t mca_errmgr_default_component = { + { + ORTE_ERRMGR_BASE_VERSION_1_3_0, + + "default", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_errmgr_default_open, /* module open */ + orte_errmgr_default_close /* module close */ + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + orte_errmgr_default_component_init, /* module init */ + orte_errmgr_default_finalize /* module shutdown */ +}; + +/* + * setup the function pointers for the module + */ +orte_errmgr_base_module_t orte_errmgr_default = { + orte_errmgr_default_proc_aborted, + orte_errmgr_default_incomplete_start, + orte_errmgr_default_register_callback, + orte_errmgr_base_error_abort +}; + + +/* + * Open the component + */ +int orte_errmgr_default_open(void) +{ + return ORTE_SUCCESS; +} + +/* + * Close the component + */ +int orte_errmgr_default_close(void) +{ + return ORTE_SUCCESS; +} + +orte_errmgr_base_module_t* +orte_errmgr_default_component_init(int *priority) +{ + /* If we are not an HNP, then don't pick us! */ + if (!orte_process_info.hnp) { + /* don't take me! */ + return NULL; + } + + /* Return a module (choose an arbitrary, positive priority -- + it's only relevant compared to other components). */ + + *priority = 10; + + return &orte_errmgr_default; +} + +/* + * finalize routine + */ +int orte_errmgr_default_finalize(void) +{ + /* All done */ + return ORTE_SUCCESS; +} diff --git a/orte/mca/errmgr/errmgr.h b/orte/mca/errmgr/errmgr.h index a8efae07db..551e95ae88 100644 --- a/orte/mca/errmgr/errmgr.h +++ b/orte/mca/errmgr/errmgr.h @@ -29,18 +29,15 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" - - -#include "orte/mca/schema/schema.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/constants.h" +#include "orte/types.h" #include "opal/mca/mca.h" +#include "opal/util/error.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +#include "orte/mca/plm/plm_types.h" + +BEGIN_C_DECLS /* * Macro definitions @@ -51,41 +48,40 @@ extern "C" { */ #define ORTE_ERROR_NAME(n) opal_strerror(n) - #define ORTE_ERROR_LOG(n) \ - orte_errmgr.log((n), __FILE__, __LINE__) + orte_errmgr_base_log(n, __FILE__, __LINE__) + +/** + * This is not part of any + * module so it can be used at any time! + */ +ORTE_DECLSPEC extern void orte_errmgr_base_log(int error_code, char *filename, int line); + /* * Component functions - all MUST be provided! */ -/** - * Log an error - * Log an error that occurred in the runtime environment - * - * @code - * orte_errmgr.log("this is an error", __FILE__, __LINE__); - * @endcode - */ -typedef void (*orte_errmgr_base_module_log_fn_t)(int error_code, char *filename, int line); - - /** * Alert - process aborted - * This function is called when a remote process aborts during execution. The function - * is called via the GPR's trigger notification system. Actions taken in response - * to the abnormal termination of a remote application process will vary across + * This function is called by the PLM when a remote process aborts during execution. Actions taken + * in response to the abnormal termination of a remote application process will vary across * the various errmgr components. - + * * NOTE: Local process errors should always be reported through the error_detected interface and * NOT here. + * + * @param *name Pointer to the name of the proc that aborted + * + * @retval ORTE_SUCCESS Whatever action that was taken was successful + * @retval ORTE_ERROR Appropriate error code */ -typedef int (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_gpr_notify_message_t *msg); +typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *name, int exit_code); /** * Alert - incomplete start of a job - * This function is called when an attempted launch of a job encounters failure of + * This function is called by the PLM when an attempted launch of a job encounters failure of * one or more processes to start. The strategy for dealing * with this "incomplete start" situation varies across the various errmgr components. * @@ -97,28 +93,16 @@ typedef int (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_gpr_notify_message * NOTE: Errmgr components on non-HNP and non-daemon processes are expressly forbidden * from taking any action to this function call. Instead, they are restricted to simply * returning. + * + * @param job Job that failed to start + * + * @retval ORTE_SUCCESS Whatever action that was taken was successful + * @retval ORTE_ERROR Appropriate error code */ -typedef int (*orte_errmgr_base_module_incomplete_start_fn_t)(orte_gpr_notify_message_t *msg); +typedef void (*orte_errmgr_base_module_incomplete_start_fn_t)(orte_jobid_t job, int exit_code); -/** - * Alert - internal error detected - * This function is called when an internal error is detected within a local process. - * It decides what to do about the error. In the case of application processes, it simply - * orders the local process to finalize and terminate. The abnormal termination will be - * detected and dealt with by the daemon/HNP system. - * - * HNPs, of course, cannot simply exit - they must first cleanup their running jobs if at - * all possible. In some cases, this cannot be done - e.g., if the error detected would - * prevent operation of the registry or has corrupted memory. In these extreme cases, - * nothing can really be done. - * - * Likewise, orteds have responsibility towards their local application processes and - * must make some attempt to clean them up before exiting. - * - * The function pretty prints an error message if possible. Error message should be - * specified using the standard \code printf() format. - */ -typedef void (*orte_errmgr_base_module_error_detected_fn_t)(int error_code, char *fmt, ...); +/* error manager callback function */ +typedef void (*orte_errmgr_cb_fn_t)(orte_jobid_t job, orte_job_state_t state, void *cbdata); /* * Register a job with the error manager @@ -136,43 +120,28 @@ typedef void (*orte_errmgr_base_module_error_detected_fn_t)(int error_code, char * NOTE: ONLY HNPs are allowed to register for trigger reports. All other components * MUST do nothing but return ORTE_SUCCESS. */ -typedef int (*orte_errmgr_base_module_register_job_fn_t)(orte_jobid_t job); +typedef int (*orte_errmgr_base_module_register_cb_fn_t)(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata); /** * Alert - self aborting - * This function is called when a process is aborting. It will finalize the process - * itself, and then exits - it takes no other actions. The intent here is to provide + * This function is called when a process is aborting due to some internal error. + * It will finalize the process + * itself, and then exit - it takes no other actions. The intent here is to provide * a last-ditch exit procedure that attempts to clean up a little. */ -typedef void (*orte_errmgr_base_module_abort_fn_t)(void) __opal_attribute_noreturn__; +typedef void (*orte_errmgr_base_module_abort_fn_t)(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); -/* - * Request that the system abort processes other than myself - * The possibility exists that a process will decide that ONLY a small subset of a job - * must be aborted. This function allows a process to request that the identified - * processes be aborted. The "request" portion of the function's name is not - * by accident - this function specifically does NOT perform the abort process - * itself, but simply requests that it be done. - * - * NOTE: Please ensure that you do NOT include your own process name in the - * array or else you will be ordered to "die" before you complete this function - * (i.e., you will be held in a blocking receive pending an answer from the - * HNP, which won't come before you receive your own "die" command). If you need - * to die too, then call "abort" after completing this function call. - */ -typedef int (*orte_errmgr_base_module_abort_procs_request_fn_t)(orte_process_name_t *procs, orte_std_cntr_t num_procs); - /* * Ver 1.0.0 */ struct orte_errmgr_base_module_1_3_0_t { - orte_errmgr_base_module_log_fn_t log; orte_errmgr_base_module_proc_aborted_fn_t proc_aborted; orte_errmgr_base_module_incomplete_start_fn_t incomplete_start; - orte_errmgr_base_module_error_detected_fn_t error_detected; - orte_errmgr_base_module_register_job_fn_t register_job; + orte_errmgr_base_module_register_cb_fn_t register_callback; orte_errmgr_base_module_abort_fn_t abort; - orte_errmgr_base_module_abort_procs_request_fn_t abort_procs_request; }; typedef struct orte_errmgr_base_module_1_3_0_t orte_errmgr_base_module_1_3_0_t; @@ -182,10 +151,7 @@ typedef orte_errmgr_base_module_1_3_0_t orte_errmgr_base_module_t; * ERRMGR Component */ -typedef orte_errmgr_base_module_t* (*orte_errmgr_base_component_init_fn_t)( - bool *allow_multi_user_threads, - bool *have_hidden_threads, - int *priority); +typedef orte_errmgr_base_module_t* (*orte_errmgr_base_component_init_fn_t)(int *priority); typedef int (*orte_errmgr_base_component_finalize_fn_t)(void); @@ -218,8 +184,6 @@ typedef mca_errmgr_base_component_1_3_0_t mca_errmgr_base_component_t; */ ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr; /* holds selected module's function pointers */ -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/errmgr/hnp/errmgr_hnp.c b/orte/mca/errmgr/hnp/errmgr_hnp.c deleted file mode 100644 index f7844894e3..0000000000 --- a/orte/mca/errmgr/hnp/errmgr_hnp.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include -#include - -#include "opal/class/opal_list.h" -#include "opal/util/trace.h" -#include "opal/util/output.h" - -#include "orte/runtime/runtime.h" -#include "orte/runtime/params.h" -#include "orte/runtime/orte_wakeup.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/schema/schema.h" -#include "orte/dss/dss.h" -#include "orte/mca/rmgr/rmgr.h" - -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/errmgr/hnp/errmgr_hnp.h" - -/* - * This function gets called when the someone updates a process - * state to indicate it has aborted. That action results in - * the firing of a registry trigger that passes a minimal - * data message here. The only part of that message we need - * is the segment name so we can extract the jobid from it - * - * Various components will follow their own strategy for dealing with - * this situation. For this component, we simply kill the job. - */ -int orte_errmgr_hnp_proc_aborted(orte_gpr_notify_message_t *msg) -{ - orte_jobid_t job; - opal_list_t attrs; - opal_list_item_t *item; - int rc; - - OPAL_TRACE(1); - - opal_output(orte_errmgr_base_output, "errmgr:hnp: proc abort has been detected"); - - /* This trigger is named, so we can extract the jobid - * directly from the trigger name - */ - if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* set the job state */ - if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_ABORTED))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* tell the pls to terminate the ENTIRE FAMLIY of this job - this is necessary to avoid - * "hanging" portions of the application if the aborted job was dynamically spawned - * from another job - */ - OBJ_CONSTRUCT(&attrs, opal_list_t); - orte_rmgr.add_attribute(&attrs, ORTE_NS_USE_JOB_FAMILY, ORTE_UNDEF, NULL, ORTE_RMGR_ATTR_OVERRIDE); - if (ORTE_SUCCESS != (rc = orte_pls.terminate_job(job, &orte_abort_timeout, &attrs))) { - ORTE_ERROR_LOG(rc); - } - while (NULL != (item = opal_list_remove_first(&attrs))) OBJ_RELEASE(item); - OBJ_DESTRUCT(&attrs); - - /* orterun will only wakeup when all procs IN THE ROOT JOB report terminated. The terminate_job - * function *should* have done that - however, it is possible during abnormal - * startup that it will fail to happen. If we get here, we force the issue by - * deliberately causing the TERMINATE trigger to fire - */ - if (ORTE_SUCCESS != (rc = orte_wakeup(job))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * This function gets called when someone updates a process - * state to indicate it failed to start. That action results in - * the firing of a registry trigger that passes a minimal - * data message here. The only part of that message we need - * is the segment name so we can extract the jobid from it - * - * Various components will follow their own strategy for dealing with - * this situation. For this component, we simply kill the job. - */ -int orte_errmgr_hnp_incomplete_start(orte_gpr_notify_message_t *msg) -{ - orte_jobid_t job; - opal_list_t attrs; - opal_list_item_t *item; - int rc; - - OPAL_TRACE(1); - - /* This trigger is named, so we can extract the jobid - * directly from the trigger name - */ - if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - opal_output(orte_errmgr_base_output, "errmgr_hnp: incomplete start reported - job %lu", (unsigned long)job); - - /* set the job state */ - if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_FAILED_TO_START))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* tell the pls to terminate the job - kill this job and all members of its family - * as we have no way to handle it otherwise at this time - */ - OBJ_CONSTRUCT(&attrs, opal_list_t); - orte_rmgr.add_attribute(&attrs, ORTE_NS_USE_JOB_FAMILY, ORTE_UNDEF, NULL, ORTE_RMGR_ATTR_OVERRIDE); - if (ORTE_SUCCESS != (rc = orte_pls.terminate_job(job, &orte_abort_timeout, &attrs))) { - ORTE_ERROR_LOG(rc); - } - while (NULL != (item = opal_list_remove_first(&attrs))) OBJ_RELEASE(item); - OBJ_DESTRUCT(&attrs); - - /* orterun will only wakeup when all procs IN THE ROOT JOB report terminated. The terminate_job - * function *should* have done that - however, it is possible during abnormal - * startup that it will fail to happen. If we get here, we force the issue by - * deliberately causing the TERMINATE trigger to fire - */ - if (ORTE_SUCCESS != (rc = orte_wakeup(job))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * This function gets called when the HNP itself detects an internal error! - * Ideally, we would find some way to tell all the active jobs to die before - * we depart ourselves. Unfortunately, at this time, we aren't sure we can do - * this - later, we'll add some more intelligence by, for example, checking - * the error code to see if it's something that would allow us to alert - * the remote orteds. - * - * For now, we'll just depart! - */ -void orte_errmgr_hnp_error_detected(int error_code, char *fmt, ...) -{ - va_list arglist; - - /* If there was a message, output it */ - - va_start(arglist, fmt); - if( NULL != fmt ) { - char* buffer = NULL; - vasprintf( &buffer, fmt, arglist ); - opal_output( 0, buffer ); - free( buffer ); - } - va_end(arglist); - - /* abnormal exit */ - orte_abort(error_code, false); -} - -/* - * This function gets called when the HNP desperately needs to just die. - * Nothing can be done by definition here - this function ONLY gets - * called as an absolute last resort - */ -void orte_errmgr_hnp_abort(void) -{ - OPAL_TRACE(1); - - /* abnormal exit */ - orte_abort(-1, false); -} - -/* - * This function gets called when a process wants to request that the HNP - * abort some set of processes for it. Since this component IS for the HNP, - * that means we need to actually execute this request! Call upon the PLS - * as needed to execute the abort requests - */ -int orte_errmgr_hnp_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) -{ - int rc; - - OPAL_TRACE(1); - - rc = ORTE_SUCCESS; - return rc; -} - -/* - * Register the HNP's errmgr functions to be called when the job encounters - * certain pre-identified problem states. - * - * NOTE: It is imperative that ONLY the HNP perform this registration! - */ -int orte_errmgr_hnp_register_job(orte_jobid_t job) -{ - /* we need to setup two counters and their corresponding triggers - one - * to alert us when something fails to launch, and another for when - * someone aborts - */ - int rc; - - OPAL_TRACE(1); - - /* define the ABORT trigger to fire when any process aborts */ - if (ORTE_SUCCESS != (rc = orte_smr.define_alert_monitor(job, ORTE_NUM_ABORTED_TRIGGER, - ORTE_PROC_NUM_ABORTED, 0, 1, true, - orte_errmgr_hnp_proc_aborted, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* define the FAILED_LAUNCH trigger to fire when the launch fails */ - if (ORTE_SUCCESS != (rc = orte_smr.define_alert_monitor(job, ORTE_FAILED_TO_START_TRIGGER, - ORTE_PROC_NUM_FAILED_START, 0, 1, true, - orte_errmgr_hnp_incomplete_start, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/hnp/errmgr_hnp_component.c b/orte/mca/errmgr/hnp/errmgr_hnp_component.c deleted file mode 100644 index 19772a995d..0000000000 --- a/orte/mca/errmgr/hnp/errmgr_hnp_component.c +++ /dev/null @@ -1,175 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_hnp.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_errmgr_base_component_t mca_errmgr_hnp_component = { - { - ORTE_ERRMGR_BASE_VERSION_1_3_0, - - "hnp", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_errmgr_hnp_open, /* module open */ - orte_errmgr_hnp_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_errmgr_hnp_component_init, /* module init */ - orte_errmgr_hnp_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_errmgr_base_module_t orte_errmgr_hnp = { - orte_errmgr_base_log, - orte_errmgr_hnp_proc_aborted, - orte_errmgr_hnp_incomplete_start, - orte_errmgr_hnp_error_detected, - orte_errmgr_hnp_register_job, - orte_errmgr_hnp_abort, - orte_errmgr_hnp_abort_procs_request -}; - - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* local globals */ -orte_errmgr_hnp_globals_t orte_errmgr_hnp_globals; - - -/* - * Open the component - */ -int orte_errmgr_hnp_open(void) -{ - int id, tmp; - - id = mca_base_param_register_int("errmgr", "hnp", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_errmgr_hnp_globals.debug = true; - } else { - orte_errmgr_hnp_globals.debug = false; - } - - return ORTE_SUCCESS; -} - -/* - * Close the component - */ -int orte_errmgr_hnp_close(void) -{ - return ORTE_SUCCESS; -} - -orte_errmgr_base_module_t* -orte_errmgr_hnp_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - int rc; - - if (orte_errmgr_hnp_globals.debug) { - opal_output(0, "errmgr_hnp_init called"); - } - - /* If we are not an HNP, then don't pick us! */ - if (!orte_process_info.seed) { - /* don't take me! */ - return NULL; - } - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other components). */ - - *priority = 10; - - /* no part of OpenRTE allows or has threads */ - - *allow_multi_user_threads = false; - *have_hidden_threads = false; - - /* start the receive function */ - if (ORTE_SUCCESS != (rc = orte_errmgr_base_comm_start())) { - ORTE_ERROR_LOG(rc); - return NULL; - } - - initialized = true; - return &orte_errmgr_hnp; -} - -/* - * finalize routine - */ -int orte_errmgr_hnp_finalize(void) -{ - int rc; - - if (orte_errmgr_hnp_globals.debug) { - opal_output(0, "%s errmgr_hnp_finalize called", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - /* stop the receive function */ - if (ORTE_SUCCESS != (rc = orte_errmgr_base_comm_stop())) { - ORTE_ERROR_LOG(rc); - } - - initialized = false; - - /* All done */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/orted/errmgr_orted.c b/orte/mca/errmgr/orted/errmgr_orted.c deleted file mode 100644 index bba38a6d8c..0000000000 --- a/orte/mca/errmgr/orted/errmgr_orted.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include -#include - -#include "opal/util/output.h" - -#include "orte/runtime/runtime.h" -#include "orte/runtime/orte_wait.h" -#include "orte/util/proc_info.h" -#include "orte/util/session_dir.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/errmgr/base/errmgr_private.h" -#include "orte/mca/errmgr/orted/errmgr_orted.h" - -/* - * This function only gets called on HNP components! Orteds learn about - * a proc aborting from the HNP. - */ -int orte_errmgr_orted_proc_aborted(orte_gpr_notify_message_t *msg) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -/* This function only gets called on HNP components! Orteds learn about - * an incomplete start from the HNP. - */ -int orte_errmgr_orted_incomplete_start(orte_gpr_notify_message_t *msg) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when the orted itself detects an internal error! - * At some point in future, to be polite, we tell any of our own local - * processes to die before we abandon them - */ -void orte_errmgr_orted_error_detected(int error_code, char *fmt, ...) -{ - va_list arglist; - - /* If there was a message, output it */ - - va_start(arglist, fmt); - if( NULL != fmt ) { - char* buffer = NULL; - vasprintf( &buffer, fmt, arglist ); - opal_output( 0, buffer ); - free( buffer ); - } - va_end(arglist); - - /* cleanup my session directory */ - orte_session_dir_finalize(orte_process_info.my_name); - - /* abnormal exit */ - orte_abort(error_code, false); -} - -/* - * This function gets called when we desperately need to just die. - * Nothing can be done by definition here - this function ONLY gets - * called as an absolute last resort - */ -void orte_errmgr_orted_abort(void) -{ - /* cleanup my session directory */ - orte_session_dir_finalize(orte_process_info.my_name); - - /* abnormal exit */ - orte_abort(-1, false); -} - -/* - * This function is called by the orted to request that some set of processes - * be aborted by the HNP. This would likely be an unusual request as the orted - * would have no knowledge of other processes or real reason to order them killed. - * Still, the capability is provided here. - */ -int orte_errmgr_orted_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - /* protect us against error */ - if (NULL == procs) { - return ORTE_ERR_BAD_PARAM; - } - - command = ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the number of procs we are requesting be aborted */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &nprocs, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the array of proc names */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, procs, nprocs, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_orted_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_orted_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - -/* - * It is imperative that ONLY an HNP perform this registration! - */ -int orte_errmgr_orted_register_job(orte_jobid_t job) -{ - return ORTE_ERR_NOT_AVAILABLE; -} diff --git a/orte/mca/errmgr/orted/errmgr_orted.h b/orte/mca/errmgr/orted/errmgr_orted.h deleted file mode 100644 index a8612c6035..0000000000 --- a/orte/mca/errmgr/orted/errmgr_orted.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef ORTE_ERRMGR_ORTED_H -#define ORTE_ERRMGR_ORTED_H - - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" - -#include "orte/mca/errmgr/errmgr.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_errmgr_orted_open(void); -int orte_errmgr_orted_close(void); - - -/* - * Startup / Shutdown - */ -orte_errmgr_base_module_t* -orte_errmgr_orted_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); - -int orte_errmgr_orted_finalize(void); - -/* - * globals used within the component - */ -typedef struct { - int debug; - orte_process_name_t *replica; -} orte_errmgr_orted_globals_t; - - -extern orte_errmgr_orted_globals_t orte_errmgr_orted_globals; - -/* - * Component API functions - */ -int orte_errmgr_orted_proc_aborted(orte_gpr_notify_message_t *msg); - -int orte_errmgr_orted_incomplete_start(orte_gpr_notify_message_t *msg); - -void orte_errmgr_orted_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); - -void orte_errmgr_orted_abort(void) __opal_attribute_noreturn__; - -int orte_errmgr_orted_register_job(orte_jobid_t job); - -int orte_errmgr_orted_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); - -ORTE_MODULE_DECLSPEC extern mca_errmgr_base_component_t mca_errmgr_orted_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/errmgr/orted/errmgr_orted_component.c b/orte/mca/errmgr/orted/errmgr_orted_component.c deleted file mode 100644 index f240ea2740..0000000000 --- a/orte/mca/errmgr/orted/errmgr_orted_component.c +++ /dev/null @@ -1,165 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_orted.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_errmgr_base_component_t mca_errmgr_orted_component = { - { - ORTE_ERRMGR_BASE_VERSION_1_3_0, - - "orted", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_errmgr_orted_open, /* module open */ - orte_errmgr_orted_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_errmgr_orted_component_init, /* module init */ - orte_errmgr_orted_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_errmgr_base_module_t orte_errmgr_orted = { - orte_errmgr_base_log, - orte_errmgr_orted_proc_aborted, - orte_errmgr_orted_incomplete_start, - orte_errmgr_orted_error_detected, - orte_errmgr_orted_register_job, - orte_errmgr_orted_abort, - orte_errmgr_orted_abort_procs_request -}; - - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* local globals */ -orte_errmgr_orted_globals_t orte_errmgr_orted_globals; - - -/* - * Open the component - */ -int orte_errmgr_orted_open(void) -{ - int id, tmp; - - id = mca_base_param_register_int("errmgr", "orted", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_errmgr_orted_globals.debug = true; - } else { - orte_errmgr_orted_globals.debug = false; - } - - return ORTE_SUCCESS; -} - -/* - * Close the component - */ -int orte_errmgr_orted_close(void) -{ - return ORTE_SUCCESS; -} - -orte_errmgr_base_module_t* -orte_errmgr_orted_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - if (orte_errmgr_orted_globals.debug) { - opal_output(0, "errmgr_orted_init called"); - } - - /* If we are not a daemon, then this component is not for us! */ - if (!orte_process_info.daemon) { - /* don't take me! */ - return NULL; - } - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other components). */ - - *priority = 10; - - /* no part of OpenRTE allows or has threads */ - - *allow_multi_user_threads = false; - *have_hidden_threads = false; - - /* define the HNP we should be talking to - for now, - * just use the NS replica - */ - orte_errmgr_orted_globals.replica = orte_process_info.ns_replica; - - initialized = true; - return &orte_errmgr_orted; -} - -/* - * finalize routine - */ -int orte_errmgr_orted_finalize(void) -{ - if (orte_errmgr_orted_globals.debug) { - opal_output(0, "%s errmgr_orted_finalize called", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - initialized = false; - - /* All done */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/proxy/errmgr_proxy.c b/orte/mca/errmgr/proxy/errmgr_proxy.c deleted file mode 100644 index e45daf23e1..0000000000 --- a/orte/mca/errmgr/proxy/errmgr_proxy.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/runtime/runtime.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/errmgr/base/errmgr_private.h" -#include "orte/mca/errmgr/proxy/errmgr_proxy.h" - -/* - * This function gets called when the SMR updates a process state to - * indicate that it aborted. Since the proxy component is only active on - * non-HNP processes, this function will NEVER be called - */ -int orte_errmgr_proxy_proc_aborted(orte_gpr_notify_message_t *msg) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when the SMR updates a process state to - * indicate that it failed to start. Since the proxy component is only active on - * non-HNP processes, this function will NEVER be called - */ -int orte_errmgr_proxy_incomplete_start(orte_gpr_notify_message_t *msg) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when a process detects an internal error. - * Various non-HNP/non-orted errmgr components will deal with this in various - * ways - for now, we simply abort and provide the error_code as our - * exit status - */ -void orte_errmgr_proxy_error_detected(int error_code, char *fmt, ...) -{ - va_list arglist; - - OPAL_TRACE(1); - - /* If there was a message, output it */ - - va_start(arglist, fmt); - if( NULL != fmt ) { - char* buffer = NULL; - vasprintf( &buffer, fmt, arglist ); - opal_output( 0, buffer ); - free( buffer ); - } - va_end(arglist); - - orte_abort(error_code, true); -} - -/* - * This function gets called when a process desperately needs to just die. - * Nothing can be done by definition here - this function ONLY gets - * called as an absolute last resort. - */ -void orte_errmgr_proxy_abort() -{ - /* abnormal exit */ - orte_abort(-1, true); -} - -/* - * Alternatively, some systems (e.g., OpenMPI) need to tell us to kill - * some other subset of processes along with us. Send that info to the - * HNP so it can kill them. - * - * NOTE: this function assumes that the underlying ORTE infrastructure is - * still operational. Use of this function should therefore be restricted - * to cases where the problem is in a higher layer (e.g., MPI) as the - * process is likely to "hang" if an ORTE problem has been encountered. - */ -int orte_errmgr_proxy_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - /* protect us against error */ - if (NULL == procs) { - return ORTE_ERR_BAD_PARAM; - } - - command = ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the number of procs we are requesting be aborted */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &nprocs, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the array of proc names */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, procs, nprocs, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_proxy_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - -/* - * It is imperative that ONLY an HNP perform this registration! - */ -int orte_errmgr_proxy_register_job(orte_jobid_t job) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - command = ORTE_ERRMGR_REGISTER_JOB_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the jobid we are requesting be monitored */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_proxy_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_REGISTER_JOB_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/proxy/errmgr_proxy.h b/orte/mca/errmgr/proxy/errmgr_proxy.h deleted file mode 100644 index 7d5ed8e4fe..0000000000 --- a/orte/mca/errmgr/proxy/errmgr_proxy.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef ORTE_ERRMGR_PROXY_H -#define ORTE_ERRMGR_PROXY_H - - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" - -#include "orte/mca/errmgr/errmgr.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_errmgr_proxy_open(void); -int orte_errmgr_proxy_close(void); - - -/* - * Startup / Shutdown - */ -orte_errmgr_base_module_t* -orte_errmgr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); - -int orte_errmgr_proxy_finalize(void); - -/* - * globals used within the component - */ -typedef struct { - int debug; - orte_process_name_t *replica; -} orte_errmgr_proxy_globals_t; - - -extern orte_errmgr_proxy_globals_t orte_errmgr_proxy_globals; - -/* - * Component API functions - */ -int orte_errmgr_proxy_proc_aborted(orte_gpr_notify_message_t *msg); - -int orte_errmgr_proxy_incomplete_start(orte_gpr_notify_message_t *msg); - -void orte_errmgr_proxy_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); - -void orte_errmgr_proxy_abort(void) __opal_attribute_noreturn__; - -int orte_errmgr_proxy_register_job(orte_jobid_t job); - -int orte_errmgr_proxy_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); - -ORTE_MODULE_DECLSPEC extern mca_errmgr_base_component_t mca_errmgr_proxy_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/errmgr/proxy/errmgr_proxy_component.c b/orte/mca/errmgr/proxy/errmgr_proxy_component.c deleted file mode 100644 index 11953f9dd4..0000000000 --- a/orte/mca/errmgr/proxy/errmgr_proxy_component.c +++ /dev/null @@ -1,164 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_proxy.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_errmgr_base_component_t mca_errmgr_proxy_component = { - { - ORTE_ERRMGR_BASE_VERSION_1_3_0, - - "proxy", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_errmgr_proxy_open, /* module open */ - orte_errmgr_proxy_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_errmgr_proxy_component_init, /* module init */ - orte_errmgr_proxy_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_errmgr_base_module_t orte_errmgr_proxy = { - orte_errmgr_base_log, - orte_errmgr_proxy_proc_aborted, - orte_errmgr_proxy_incomplete_start, - orte_errmgr_proxy_error_detected, - orte_errmgr_proxy_register_job, - orte_errmgr_proxy_abort, - orte_errmgr_proxy_abort_procs_request -}; - - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* local globals */ -orte_errmgr_proxy_globals_t orte_errmgr_proxy_globals; - -/* - * Open the component - */ -int orte_errmgr_proxy_open(void) -{ - int id, tmp; - - id = mca_base_param_register_int("errmgr", "proxy", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_errmgr_proxy_globals.debug = true; - } else { - orte_errmgr_proxy_globals.debug = false; - } - - return ORTE_SUCCESS; -} - -/* - * Close the component - */ -int orte_errmgr_proxy_close(void) -{ - return ORTE_SUCCESS; -} - -orte_errmgr_base_module_t* -orte_errmgr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - if (orte_errmgr_proxy_globals.debug) { - opal_output(0, "errmgr_proxy_init called"); - } - - /* If we are an HNP or an orted, then don't pick us! */ - if (orte_process_info.seed || orte_process_info.daemon) { - /* don't take me! */ - return NULL; - } - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other components). */ - - *priority = 10; - - /* no part of OpenRTE allows or has threads */ - - *allow_multi_user_threads = false; - *have_hidden_threads = false; - - /* define the replica for us to use - for now, just point - * to the name service replica - */ - orte_errmgr_proxy_globals.replica = orte_process_info.ns_replica; - - initialized = true; - return &orte_errmgr_proxy; -} - -/* - * finalize routine - */ -int orte_errmgr_proxy_finalize(void) -{ - if (orte_errmgr_proxy_globals.debug) { - opal_output(0, "%s errmgr_proxy_finalize called", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - initialized = false; - - /* All done */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sds/Makefile.am b/orte/mca/ess/Makefile.am similarity index 86% rename from orte/mca/sds/Makefile.am rename to orte/mca/ess/Makefile.am index 3b481a496d..7a251654bf 100644 --- a/orte/mca/sds/Makefile.am +++ b/orte/mca/ess/Makefile.am @@ -19,21 +19,21 @@ AM_CPPFLAGS = $(LTDLINCL) # main library setup -noinst_LTLIBRARIES = libmca_sds.la -libmca_sds_la_SOURCES = +noinst_LTLIBRARIES = libmca_ess.la +libmca_ess_la_SOURCES = # header setup nobase_orte_HEADERS = dist_pkgdata_DATA = # local files -headers = sds.h -libmca_sds_la_SOURCES += $(headers) +headers = ess.h +libmca_ess_la_SOURCES += $(headers) # Conditionally install the header files if WANT_INSTALL_HEADERS nobase_orte_HEADERS += $(headers) -ortedir = $(includedir)/openmpi/orte/mca/sds +ortedir = $(includedir)/openmpi/orte/mca/ess else ortedir = $(includedir) endif diff --git a/orte/mca/sds/alps/Makefile.am b/orte/mca/ess/alps/Makefile.am similarity index 73% rename from orte/mca/sds/alps/Makefile.am rename to orte/mca/ess/alps/Makefile.am index e3e128a998..9c2f3a31f5 100644 --- a/orte/mca/sds/alps/Makefile.am +++ b/orte/mca/ess/alps/Makefile.am @@ -9,7 +9,6 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008 UT-Battelle, LLC # $COPYRIGHT$ # # Additional copyrights may follow @@ -22,30 +21,30 @@ sources = \ - sds_alps.h \ - sds_alps_component.c \ - sds_alps_module.c + ess_alps.h \ + ess_alps_component.c \ + ess_alps_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_sds_alps_DSO +if OMPI_BUILD_ess_alps_DSO component_noinst = -component_install = mca_sds_alps.la +component_install = mca_ess_alps.la else -component_noinst = libmca_sds_alps.la +component_noinst = libmca_ess_alps.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sds_alps_la_SOURCES = $(sources) -mca_sds_alps_la_LDFLAGS = -module -avoid-version -mca_sds_alps_la_LIBADD = \ +mca_ess_alps_la_SOURCES = $(sources) +mca_ess_alps_la_LDFLAGS = -module -avoid-version +mca_ess_alps_la_LIBADD = \ $(top_ompi_builddir)/orte/libopen-rte.la \ $(top_ompi_builddir)/opal/libopen-pal.la noinst_LTLIBRARIES = $(component_noinst) -libmca_sds_alps_la_SOURCES =$(sources) -libmca_sds_alps_la_LDFLAGS = -module -avoid-version +libmca_ess_alps_la_SOURCES =$(sources) +libmca_ess_alps_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sds/alps/configure.m4 b/orte/mca/ess/alps/configure.m4 similarity index 83% rename from orte/mca/sds/alps/configure.m4 rename to orte/mca/ess/alps/configure.m4 index 417cbd36d4..6b4187c36b 100644 --- a/orte/mca/sds/alps/configure.m4 +++ b/orte/mca/ess/alps/configure.m4 @@ -10,7 +10,6 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008 UT-Battelle, LLC # $COPYRIGHT$ # # Additional copyrights may follow @@ -18,10 +17,10 @@ # $HEADER$ # -# MCA_sds_alps_CONFIG([action-if-found], [action-if-not-found]) +# MCA_ess_alps_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_sds_alps_CONFIG],[ - OMPI_CHECK_ALPS([sds_alps], +AC_DEFUN([MCA_ess_alps_CONFIG],[ + OMPI_CHECK_ALPS([ess_alps], [AC_CHECK_FUNC([cnos_get_rank], [$1], [$2])], [$2]) ])dnl diff --git a/orte/mca/ess/alps/configure.params b/orte/mca/ess/alps/configure.params new file mode 100644 index 0000000000..93a965d5a9 --- /dev/null +++ b/orte/mca/ess/alps/configure.params @@ -0,0 +1,30 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that, if we can build, +# only ALPS component will build. This is set higher +# than the CNOS component to ensure we don't get both +# since the ALPS component will -only- build if specifically +# ordered to do so - which means we don't want the CNOS one + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/oob/oob_types.h b/orte/mca/ess/alps/ess_alps.h similarity index 72% rename from orte/mca/oob/oob_types.h rename to orte/mca/ess/alps/ess_alps.h index 3e502cf4b2..305bc34f69 100644 --- a/orte/mca/oob/oob_types.h +++ b/orte/mca/ess/alps/ess_alps.h @@ -15,17 +15,20 @@ * * $HEADER$ */ -/** @file: - * - * the oob framework + +#ifndef ORTE_ESS_ALPS_H +#define ORTE_ESS_ALPS_H + +BEGIN_C_DECLS + +/* + * Module open / close */ +int orte_ess_alps_component_open(void); +int orte_ess_alps_component_close(void); +orte_ess_base_module_t* orte_ess_alps_component_init(int *priority); -#ifndef _MCA_OOB_TYPES_H_ -#define _MCA_OOB_TYPES_H_ -#include "orte_config.h" -#include "orte/orte_constants.h" +END_C_DECLS -#define ORTE_OOB_SUBSCRIPTION "orte-oob-sub" - -#endif /* MCA_OOB_TYPES_H */ +#endif /* ORTE_ESS_ALPS_H */ diff --git a/orte/mca/sds/singleton/sds_singleton_component.c b/orte/mca/ess/alps/ess_alps_component.c similarity index 69% rename from orte/mca/sds/singleton/sds_singleton_component.c rename to orte/mca/ess/alps/ess_alps_component.c index 95d4eba247..1543029aab 100644 --- a/orte/mca/sds/singleton/sds_singleton_component.c +++ b/orte/mca/ess/alps/ess_alps_component.c @@ -23,65 +23,66 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/singleton/sds_singleton.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_singleton_module; +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/alps/ess_alps.h" + +extern orte_ess_base_module_t orte_ess_alps_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_singleton_component = { +orte_ess_base_component_t mca_ess_alps_component = { /* First, the mca_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ - "singleton", + "alps", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_singleton_component_open, - orte_sds_singleton_component_close + orte_ess_alps_component_open, + orte_ess_alps_component_close }, /* Next the MCA v1.0.0 component meta data */ { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT + /* The component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE }, /* Initialization / querying functions */ - orte_sds_singleton_component_init + orte_ess_alps_component_init }; int -orte_sds_singleton_component_open(void) +orte_ess_alps_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_singleton_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_alps_component_init(int *priority) { - *priority = 0; - return &orte_sds_singleton_module; + *priority = 35; + return &orte_ess_alps_module; } int -orte_sds_singleton_component_close(void) +orte_ess_alps_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c new file mode 100644 index 0000000000..70e22fd232 --- /dev/null +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/util/show_help.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/alps/ess_alps.h" + +static int alps_set_name(void); + +static int rte_init(char flags); +static int rte_finalize(void); + + +orte_ess_base_module_t orte_ess_alps_module = { + rte_init, + rte_finalize, + orte_ess_base_app_abort +}; + + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + + /* Start by getting a unique name */ + alps_set_name(); + + /* if I am a daemon, complete my setup using the + * default procedure + */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_orted_setup"; + goto error; + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto error; + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_app_setup"; + goto error; + } + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +static int rte_finalize(void) +{ + int ret; + + /* if I am a daemon, finalize using the default procedure */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { + ORTE_ERROR_LOG(ret); + } + } + + return ret; +} + + +static int alps_set_name(void) +{ + int rc; + int id; + orte_jobid_t jobid; + orte_vpid_t starting_vpid; + char* jobid_string; + char* vpid_string; + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "ess:alps setting name")); + + id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); + mca_base_param_lookup_string(id, &jobid_string); + if (NULL == jobid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); + mca_base_param_lookup_string(id, &vpid_string); + if (NULL == vpid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&starting_vpid, vpid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + ORTE_PROC_MY_NAME->jobid = jobid; + ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank() + starting_vpid; + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "ess:alps set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size(); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/sds/base/Makefile.am b/orte/mca/ess/base/Makefile.am similarity index 72% rename from orte/mca/sds/base/Makefile.am rename to orte/mca/ess/base/Makefile.am index 087575182f..790466afba 100644 --- a/orte/mca/sds/base/Makefile.am +++ b/orte/mca/ess/base/Makefile.am @@ -16,16 +16,18 @@ # $HEADER$ # -dist_pkgdata_DATA += base/help-sds-base.txt +dist_pkgdata_DATA += base/help-ess-base.txt headers += \ base/base.h -libmca_sds_la_SOURCES += \ - base/sds_base_close.c \ - base/sds_base_open.c \ - base/sds_base_select.c \ - base/sds_base_interface.c \ - base/sds_base_universe.c \ - base/sds_base_get.c \ - base/sds_base_put.c +libmca_ess_la_SOURCES += \ + base/ess_base_close.c \ + base/ess_base_open.c \ + base/ess_base_select.c \ + base/ess_base_get.c \ + base/ess_base_put.c \ + base/ess_base_std_tool.c \ + base/ess_base_std_app.c \ + base/ess_base_std_orted.c + diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h new file mode 100644 index 0000000000..e4a5044b6b --- /dev/null +++ b/orte/mca/ess/base/base.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef MCA_ESS_BASE_H +#define MCA_ESS_BASE_H + +#include "orte_config.h" +#include "orte/types.h" + +#include "opal/mca/mca.h" +#include "orte/mca/ess/ess.h" + +BEGIN_C_DECLS + +/* + * Global functions for MCA overall collective open and close + */ + +/** + * Open the ess framework + */ +ORTE_DECLSPEC int orte_ess_base_open(void); + +/** + * Select a ess module + */ +ORTE_DECLSPEC int orte_ess_base_select(void); + +/** + * Close the ess framework + */ +ORTE_DECLSPEC int orte_ess_base_close(void); + +/* + * The verbose channel for debug output + */ +ORTE_DECLSPEC extern int orte_ess_base_output; + +/* + * Internal helper functions used by components + */ +ORTE_DECLSPEC int orte_ess_env_get(void); + +ORTE_DECLSPEC int orte_ess_base_app_setup(void); +ORTE_DECLSPEC int orte_ess_base_app_finalize(void); +ORTE_DECLSPEC void orte_ess_base_app_abort(int status, bool report) __opal_attribute_noreturn__; + +ORTE_DECLSPEC int orte_ess_base_tool_setup(void); +ORTE_DECLSPEC int orte_ess_base_tool_finalize(void); + +ORTE_DECLSPEC int orte_ess_base_orted_setup(void); +ORTE_DECLSPEC int orte_ess_base_orted_finalize(void); + + +/* + * Put functions + */ +ORTE_DECLSPEC int orte_ess_env_put(orte_std_cntr_t num_procs, + orte_std_cntr_t num_local_procs, + char ***env); + +ORTE_DECLSPEC extern opal_list_t orte_ess_base_components_available; + +END_C_DECLS + +#endif diff --git a/orte/mca/ess/base/ess_base_close.c b/orte/mca/ess/base/ess_base_close.c new file mode 100644 index 0000000000..29544b05b3 --- /dev/null +++ b/orte/mca/ess/base/ess_base_close.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_component_repository.h" +#include "opal/util/output.h" + +#include "orte/mca/ess/base/base.h" + +extern opal_list_t orte_ess_base_components_available; + +int +orte_ess_base_close(void) +{ + opal_list_item_t *item; + mca_base_component_list_item_t *cli; + + /* unload all remaining components */ + while (NULL != (item = opal_list_remove_first(&orte_ess_base_components_available))) { + orte_ess_base_component_t* component; + cli = (mca_base_component_list_item_t *) item; + component = (orte_ess_base_component_t *) cli->cli_component; + opal_output_verbose(10, 0, + "orte_ess_base_close: module %s unloaded", + component->ess_version.mca_component_name); + mca_base_component_repository_release((mca_base_component_t *) component); + OBJ_RELEASE(item); + } + + OBJ_DESTRUCT(&orte_ess_base_components_available); + return ORTE_SUCCESS; +} + diff --git a/orte/mca/sds/base/sds_base_get.c b/orte/mca/ess/base/ess_base_get.c similarity index 63% rename from orte/mca/sds/base/sds_base_get.c rename to orte/mca/ess/base/ess_base_get.c index c0de6b8bb8..6f0a479158 100644 --- a/orte/mca/sds/base/sds_base_get.c +++ b/orte/mca/ess/base/ess_base_get.c @@ -17,6 +17,7 @@ */ #include "orte_config.h" +#include "orte/constants.h" #ifdef HAVE_UNISTD_H #include @@ -27,21 +28,20 @@ #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/mca/base/mca_base_param.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/base/base.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/errmgr/base/base.h" -int orte_sds_env_get(void) +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/proc_info.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_env_get(void) { int num_procs; - int local_rank; - int num_local_procs; - int id; - id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1); - mca_base_param_lookup_int(id, &num_procs); + mca_base_param_reg_int_name("orte", "ess_num_procs", + "Used to discover the number of procs in the job", + true, false, -1, &num_procs); + if (num_procs < 0) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; @@ -52,17 +52,19 @@ int orte_sds_env_get(void) * to set it for orteds - so just set it to an invalid value which indicates * it wasn't found if it isn't there */ - id = mca_base_param_register_int("ns", "nds", "local_rank", NULL, ORTE_VPID_INVALID); - mca_base_param_lookup_int(id, &local_rank); - orte_process_info.local_rank = (orte_vpid_t)local_rank; + mca_base_param_reg_int_name("orte", "ess_local_rank", + "Used to discover the local rank of a process on a node", + true, false, (int)ORTE_VPID_INVALID, &num_procs); + orte_process_info.local_rank = (orte_vpid_t)num_procs; /* it is okay for this param not to be found - for example, we don't bother * to set it for orteds - so just set it to a value which indicates * it wasn't found if it isn't there */ - id = mca_base_param_register_int("ns", "nds", "num_local_procs", NULL, 0); - mca_base_param_lookup_int(id, &num_local_procs); - orte_process_info.num_local_procs = (orte_std_cntr_t)num_local_procs; + mca_base_param_reg_int_name("orte", "ess_num_local_procs", + "Used to discover the number of processes on a node", + true, false, -1, &num_procs); + orte_process_info.num_local_procs = (orte_std_cntr_t)num_procs; return ORTE_SUCCESS; } diff --git a/orte/mca/sds/base/sds_base_open.c b/orte/mca/ess/base/ess_base_open.c similarity index 68% rename from orte/mca/sds/base/sds_base_open.c rename to orte/mca/ess/base/ess_base_open.c index d3fbc1700c..2238a3a818 100644 --- a/orte/mca/sds/base/sds_base_open.c +++ b/orte/mca/ess/base/ess_base_open.c @@ -18,11 +18,13 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" + #include "opal/util/output.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" -#include "orte/mca/sds/base/base.h" + +#include "orte/mca/ess/base/base.h" /* @@ -31,20 +33,26 @@ * module's public mca_base_module_t struct. */ -#include "orte/mca/sds/base/static-components.h" +#include "orte/mca/ess/base/static-components.h" -opal_list_t orte_sds_base_components_available; -orte_sds_base_module_t *orte_sds_base_module = NULL; +opal_list_t orte_ess_base_components_available; +orte_ess_base_module_t orte_ess = { + NULL, + NULL +}; +int orte_ess_base_output; int -orte_sds_base_open(void) +orte_ess_base_open(void) { - OBJ_CONSTRUCT(&orte_sds_base_components_available, opal_list_t); + orte_ess_base_output = opal_output_open(NULL); + + OBJ_CONSTRUCT(&orte_ess_base_components_available, opal_list_t); /* Open up all available components */ if (ORTE_SUCCESS != - mca_base_components_open("sds", 0, mca_sds_base_static_components, - &orte_sds_base_components_available, + mca_base_components_open("ess", orte_ess_base_output, mca_ess_base_static_components, + &orte_ess_base_components_available, true)) { return ORTE_ERROR; } diff --git a/orte/mca/ess/base/ess_base_put.c b/orte/mca/ess/base/ess_base_put.c new file mode 100644 index 0000000000..805976f712 --- /dev/null +++ b/orte/mca/ess/base/ess_base_put.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#include "opal/util/opal_environ.h" +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_env_put(orte_std_cntr_t num_procs, + orte_std_cntr_t num_local_procs, + char ***env) +{ + char* param; + char* value; + + /* tell the SDS to select the env component */ + if(NULL == (param = mca_base_param_environ_variable("ess",NULL,NULL))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_setenv(param, "env", true, env); + free(param); + + /* since we want to pass the name as separate components, make sure + * that the "name" environmental variable is cleared! + */ + if(NULL == (param = mca_base_param_environ_variable("orte","ess","name"))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_unsetenv(param, env); + free(param); + + asprintf(&value, "%ld", (long) num_procs); + if(NULL == (param = mca_base_param_environ_variable("orte","ess","num_procs"))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_setenv(param, value, true, env); + free(param); + /* although the num_procs is the comm_world size, users + * would appreciate being given a public environmental variable + * that also represents this value - something MPI specific - so + * do that here. + * + * AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT. + * We know - just live with it + */ + opal_setenv("MPI_COMM_WORLD_SIZE", value, true, env); + free(value); + + asprintf(&value, "%ld", (long) num_local_procs); + if(NULL == (param = mca_base_param_environ_variable("orte","ess","num_local_procs"))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_setenv(param, value, true, env); + free(param); + free(value); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/sds/base/sds_base_select.c b/orte/mca/ess/base/ess_base_select.c similarity index 57% rename from orte/mca/sds/base/sds_base_select.c rename to orte/mca/ess/base/ess_base_select.c index 28e38c5179..82b09e0282 100644 --- a/orte/mca/sds/base/sds_base_select.c +++ b/orte/mca/ess/base/ess_base_select.c @@ -17,53 +17,53 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include "opal/class/opal_list.h" #include "opal/util/output.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_component_repository.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/base/base.h" -extern opal_list_t orte_sds_base_components_available; -extern orte_sds_base_module_t *orte_sds_base_module; +#include "orte/mca/ess/base/base.h" + +extern opal_list_t orte_ess_base_components_available; int -orte_sds_base_select(void) +orte_ess_base_select(void) { opal_list_item_t *item; mca_base_component_list_item_t *cli; int selected_priority = -1; - orte_sds_base_component_t *selected_component = NULL; - orte_sds_base_module_t *selected_module = NULL; + orte_ess_base_component_t *selected_component = NULL; + orte_ess_base_module_t *selected_module = NULL; /* Traverse the list of opened modules; call their init functions. */ - for(item = opal_list_get_first(&orte_sds_base_components_available); - item != opal_list_get_end(&orte_sds_base_components_available); + for(item = opal_list_get_first(&orte_ess_base_components_available); + item != opal_list_get_end(&orte_ess_base_components_available); item = opal_list_get_next(item)) { - orte_sds_base_component_t* component; + orte_ess_base_component_t* component; cli = (mca_base_component_list_item_t *) item; - component = (orte_sds_base_component_t *) cli->cli_component; + component = (orte_ess_base_component_t *) cli->cli_component; - opal_output_verbose(10, 0, - "orte_sds_base_select: initializing %s component %s", - component->sds_version.mca_type_name, - component->sds_version.mca_component_name); + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: initializing %s component %s", + component->ess_version.mca_type_name, + component->ess_version.mca_component_name); - if (NULL == component->sds_init) { - opal_output_verbose(10, 0, - "orte_sds_base_select: no init function; ignoring component"); + if (NULL == component->ess_init) { + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: no init function; ignoring component"); } else { int priority; - orte_sds_base_module_t* module = component->sds_init(&priority); + orte_ess_base_module_t* module = component->ess_init(&priority); /* If the component didn't initialize, remove it from the opened list and remove it from the component repository */ if (NULL == module) { - opal_output_verbose(10, 0, - "orte_sds_base_select: init returned failure"); + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: init returned failure"); continue; } @@ -80,18 +80,18 @@ orte_sds_base_select(void) } /* unload all components that were not selected */ - item = opal_list_get_first(&orte_sds_base_components_available); - while(item != opal_list_get_end(&orte_sds_base_components_available)) { + item = opal_list_get_first(&orte_ess_base_components_available); + while(item != opal_list_get_end(&orte_ess_base_components_available)) { opal_list_item_t* next = opal_list_get_next(item); - orte_sds_base_component_t* component; + orte_ess_base_component_t* component; cli = (mca_base_component_list_item_t *) item; - component = (orte_sds_base_component_t *) cli->cli_component; + component = (orte_ess_base_component_t *) cli->cli_component; if(component != selected_component) { - opal_output_verbose(10, 0, - "orte_sds_base_select: module %s unloaded", - component->sds_version.mca_component_name); + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: module %s unloaded", + component->ess_version.mca_component_name); mca_base_component_repository_release((mca_base_component_t *) component); - opal_list_remove_item(&orte_sds_base_components_available, item); + opal_list_remove_item(&orte_ess_base_components_available, item); OBJ_RELEASE(item); } item = next; @@ -99,7 +99,10 @@ orte_sds_base_select(void) /* setup reference to selected module */ if(NULL != selected_module) { - orte_sds_base_module = selected_module; + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: module %s selected", + selected_component->ess_version.mca_component_name); + orte_ess = *selected_module; } return ORTE_SUCCESS; } diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c new file mode 100644 index 0000000000..16c2110817 --- /dev/null +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/util/os_path.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/grpcomm/base/base.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/filem/base/base.h" +#include "orte/mca/iof/base/base.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/session_dir.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_cr.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wait.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_base_app_setup(void) +{ + int ret; + char *error = NULL; + char *jobid_str, *procid_str; + + /* Setup the communication infrastructure */ + + /* Runtime Messaging Layer */ + if (ORTE_SUCCESS != (ret = orte_rml_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_select"; + goto error; + } + /* Routed system */ + if (ORTE_SUCCESS != (ret = orte_routed_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_select"; + goto error; + } + + /* + * Group communications + */ + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_select"; + goto error; + } + + /* although only the HNP and orteds open/select the PLM, everyone + * else has access to the PLM env proxy. + * We now provide a chance for the PLM + * to perform any module-specific init functions - non-HNP/orted + * procs will simply perform the PLM proxy init + */ + if (ORTE_SUCCESS != (ret = orte_plm.init())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_init"; + goto error; + } + + /* enable communication via the rml */ + if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml.enable_comm"; + goto error; + } + + /* setup my session directory */ + if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobid_str, ORTE_PROC_MY_NAME->jobid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_jobid_to_string"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_util_convert_vpid_to_string(&procid_str, ORTE_PROC_MY_NAME->vpid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_vpid_to_string"; + goto error; + } + + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s setting up session dir with\n\ttmpdir: %s\n\tuser %s\n\thost %s\n\tjobid %s\n\tprocid %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, + orte_system_info.user, orte_system_info.nodename, jobid_str, procid_str)); + + if (ORTE_SUCCESS != (ret = orte_session_dir(true, + orte_process_info.tmpdir_base, + orte_system_info.user, + orte_system_info.nodename, NULL, + jobid_str, procid_str))) { + if (jobid_str != NULL) free(jobid_str); + if (procid_str != NULL) free(procid_str); + ORTE_ERROR_LOG(ret); + error = "orte_session_dir"; + goto error; + } + if (NULL != jobid_str) { + free(jobid_str); + } + if (NULL != procid_str) { + free(procid_str); + } + + /* Once the session directory location has been established, set + the opal_output env file location to be in the + proc-specific session directory. */ + opal_output_set_output_file_info(orte_process_info.proc_session_dir, + "output-", NULL, NULL); + + + /* setup the routed info - the selected routed component + * will know what to do. Some may put us in a blocking + * receive here so they can get ALL of the contact info + * from our peers. Others may just find the local daemon's + * contact info and immediately return. + */ + if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_routed.init_routes"; + goto error; + } + + /* + * setup I/O forwarding system - must come after we init routes + * so we can get our HNP's name set + */ + if (ORTE_SUCCESS != (ret = orte_iof_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_iof_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_select"; + goto error; + } + + +#if OPAL_ENABLE_FT == 1 + /* + * Setup the SnapC + */ + if (ORTE_SUCCESS != (ret = orte_snapc_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_select"; + goto error; + } + + /* apps need the OPAL CR stuff */ + opal_cr_set_enabled(true); +#else + opal_cr_set_enabled(false); +#endif + + /* Initalize the CR setup + * Note: Always do this, even in non-FT builds. + * If we don't some user level tools may hang. + */ + if (ORTE_SUCCESS != (ret = orte_cr_init())) { + ORTE_ERROR_LOG(ret); + error = "orte_cr_init"; + goto error; + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +int orte_ess_base_app_finalize(void) +{ + orte_cr_finalize(); + +#if OPAL_ENABLE_FT == 1 + orte_snapc_base_close(); +#endif + orte_filem_base_close(); + + orte_wait_finalize(); + orte_iof_base_close(); + + /* now can close the rml and its friendly group comm */ + orte_grpcomm_base_close(); + orte_routed_base_close(); + orte_rml_base_close(); + + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} + +/* + * We do NOT call the regular C-library "abort" function, even + * though that would have alerted us to the fact that this is + * an abnormal termination, because it would automatically cause + * a core file to be generated. On large systems, that can be + * overwhelming (imagine a few thousand Gbyte-sized files hitting + * a shared file system simultaneously...ouch!). + * + * However, this causes a problem for OpenRTE as the system truly + * needs to know that this actually IS an abnormal termination. + * To get around the problem, we create a file in the session + * directory - we don't need to put anything in it, though, as its + * very existence simply alerts us that this was an abnormal + * termination. + * + * The session directory finalize system will clean this file up + * for us automagically. However, it needs to stick around long + * enough for our local daemon to find it! So, we do NOT call + * session_dir_finalize here!!! Someone will clean up for us. + * + * In some cases, however, we DON'T want to create that alert. For + * example, if an orted detects that the HNP has died, then there + * is truly nobody to alert! In these cases, we pass report=false + * to prevent the abort file from being created. This allows the + * session directory tree to cleanly be eliminated. + */ +void orte_ess_base_app_abort(int status, bool report) +{ + char *abort_file; + int fd; + + /* Exit - do NOT do a normal finalize as this will very likely + * hang the process. We are aborting due to an abnormal condition + * that precludes normal cleanup + * + * We do need to do the following bits to make sure we leave a + * clean environment. Taken from orte_finalize(): + * - Assume errmgr cleans up child processes before we exit. + */ + + /* CRS cleanup since it may have a named pipe and thread active */ + orte_cr_finalize(); + + /* If we were asked to report this termination, + * write an "abort" file into our session directory + */ + if (report) { + abort_file = opal_os_path(false, orte_process_info.proc_session_dir, "abort", NULL); + if (NULL == abort_file) { + /* got a problem */ + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + goto CLEANUP; + } + OPAL_OUTPUT_VERBOSE((5, orte_debug_output, + "%s orte_ess_app_abort: dropping abort file %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), abort_file)); + fd = open(abort_file, O_CREAT, 0600); + if (0 < fd) close(fd); + } + +CLEANUP: + /* - Clean out the global structures + * (not really necessary, but good practice) */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + /* Now Exit */ + exit(status); +} diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c new file mode 100644 index 0000000000..7b14e1961f --- /dev/null +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/grpcomm/base/base.h" +#include "orte/mca/iof/base/base.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/odls/base/base.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/base.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/mca/filem/base/base.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/session_dir.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_cr.h" +#include "orte/runtime/orte_wait.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_base_orted_setup(void) +{ + int ret; + char *error = NULL; + char *jobid_str, *procid_str; + + /* if I am a daemon, I still need to open and select the + * the PLM so I can do local spawns, if permitted + */ + if (ORTE_SUCCESS != (ret = orte_plm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_select"; + goto error; + } + + /* Setup the communication infrastructure */ + + /* Runtime Messaging Layer */ + if (ORTE_SUCCESS != (ret = orte_rml_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_select"; + goto error; + } + /* Routed system */ + if (ORTE_SUCCESS != (ret = orte_routed_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_select"; + goto error; + } + /* + * Group communications + */ + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_select"; + goto error; + } + + /* Now provide a chance for the PLM + * to perform any module-specific init functions. This + * needs to occur AFTER the communications are setup + * as it may involve starting a non-blocking recv + */ + if (ORTE_SUCCESS != (ret = orte_plm.init())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_init"; + goto error; + } + + /* Open/select the odls */ + if (ORTE_SUCCESS != (ret = orte_odls_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_odls_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_select"; + goto error; + } + + /* enable communication with the rml */ + if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml.enable_comm"; + goto error; + } + + /* setup my session directory */ + if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobid_str, ORTE_PROC_MY_NAME->jobid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_jobid_to_string"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_util_convert_vpid_to_string(&procid_str, ORTE_PROC_MY_NAME->vpid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_vpid_to_string"; + goto error; + } + + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s setting up session dir with\n\ttmpdir: %s\n\tuser %s\n\thost %s\n\tjobid %s\n\tprocid %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, + orte_system_info.user, orte_system_info.nodename, jobid_str, procid_str)); + + if (ORTE_SUCCESS != (ret = orte_session_dir(true, + orte_process_info.tmpdir_base, + orte_system_info.user, + orte_system_info.nodename, NULL, + jobid_str, procid_str))) { + if (jobid_str != NULL) free(jobid_str); + if (procid_str != NULL) free(procid_str); + ORTE_ERROR_LOG(ret); + error = "orte_session_dir"; + goto error; + } + if (NULL != jobid_str) { + free(jobid_str); + } + if (NULL != procid_str) { + free(procid_str); + } + + /* setup the routed info - the selected routed component + * will know what to do. + */ + if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_routed.init_routes"; + goto error; + } + + /* + * setup I/O forwarding system - must come after we init routes */ + if (ORTE_SUCCESS != (ret = orte_iof_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_iof_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_select"; + goto error; + } + + /* setup the FileM */ + if (ORTE_SUCCESS != (ret = orte_filem_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_filem_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_filem_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_filem_base_select"; + goto error; + } + +#if OPAL_ENABLE_FT == 1 + /* + * Setup the SnapC + */ + if (ORTE_SUCCESS != (ret = orte_snapc_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_select"; + goto error; + } + + /* For daemons, ORTE doesn't need the OPAL CR stuff */ + opal_cr_set_enabled(false); +#else + opal_cr_set_enabled(false); +#endif + + /* + * Initalize the CR setup + * Note: Always do this, even in non-FT builds. + * If we don't some user level tools may hang. + */ + if (ORTE_SUCCESS != (ret = orte_cr_init())) { + ORTE_ERROR_LOG(ret); + error = "orte_cr_init"; + goto error; + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +int orte_ess_base_orted_finalize(void) +{ + orte_cr_finalize(); + +#if OPAL_ENABLE_FT == 1 + orte_snapc_base_close(); +#endif + orte_filem_base_close(); + + orte_odls_base_close(); + + orte_wait_finalize(); + orte_iof_base_close(); + + /* finalize selected modules so they can de-register + * any receives + */ + orte_plm_base_close(); + orte_errmgr_base_close(); + + /* now can close the rml and its friendly group comm */ + orte_grpcomm_base_close(); + orte_routed_base_close(); + orte_rml_base_close(); + + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c new file mode 100644 index 0000000000..96e02f90b3 --- /dev/null +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/errmgr/errmgr.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/session_dir.h" +#include "orte/runtime/orte_cr.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_base_tool_setup(void) +{ + int ret; + char *error = NULL; + + /* Setup the communication infrastructure */ + + /* Runtime Messaging Layer */ + if (ORTE_SUCCESS != (ret = orte_rml_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_select"; + goto error; + } + /* Routed system */ + if (ORTE_SUCCESS != (ret = orte_routed_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_select"; + goto error; + } + + /* since I am a tool, then all I really want to do is communicate. + * So setup communications and be done - finding the HNP + * to which I want to communicate and setting up a route for + * that link is my responsibility + */ + + /* enable communication via the rml */ + if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml.enable_comm"; + goto error; + } + /* we -may- need to know the name of the head + * of our session directory tree, particularly the + * tmp base where any other session directories on + * this node might be located + */ + if (ORTE_SUCCESS != (ret = orte_session_dir_get_name(NULL, + &orte_process_info.tmpdir_base, + &orte_process_info.top_session_dir, + orte_system_info.user, + orte_system_info.nodename, NULL, + NULL, NULL))) { + ORTE_ERROR_LOG(ret); + error = "define session dir names"; + goto error; + } + +#if OPAL_ENABLE_FT == 1 + /* + * Setup the SnapC + */ + if (ORTE_SUCCESS != (ret = orte_snapc_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_select"; + goto error; + } + + /* Tools do not need all the OPAL CR stuff */ + opal_cr_set_enabled(false); +#endif + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +int orte_ess_base_tool_finalize(void) +{ + /* if I am a tool, then all I will have done is + * a very small subset of orte_init - ensure that + * I only back those elements out + */ + orte_routed_base_close(); + orte_rml_base_close(); + + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/sds/base/help-sds-base.txt b/orte/mca/ess/base/help-ess-base.txt similarity index 98% rename from orte/mca/sds/base/help-sds-base.txt rename to orte/mca/ess/base/help-ess-base.txt index 6bc6a85ec2..5e315c74ec 100644 --- a/orte/mca/sds/base/help-sds-base.txt +++ b/orte/mca/ess/base/help-ess-base.txt @@ -18,7 +18,7 @@ # # This is the US/English general help file for the SDS base. # -[sds-base:execv-error] +[ess-base:execv-error] The singleton application was not able to find the executable "orted" in your PATH or in the directory where Open MPI/OpenRTE was initially installed, and therefore cannot continue. diff --git a/orte/mca/errmgr/hnp/Makefile.am b/orte/mca/ess/cnos/Makefile.am similarity index 74% rename from orte/mca/errmgr/hnp/Makefile.am rename to orte/mca/ess/cnos/Makefile.am index 4faeeb8a29..53badad39b 100644 --- a/orte/mca/errmgr/hnp/Makefile.am +++ b/orte/mca/ess/cnos/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - errmgr_hnp.h \ - errmgr_hnp_component.c \ - errmgr_hnp.c + ess_cnos.h \ + ess_cnos_component.c \ + ess_cnos_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_errmgr_hnp_DSO +if OMPI_BUILD_ess_cnos_DSO component_noinst = -component_install = mca_errmgr_hnp.la +component_install = mca_ess_cnos.la else -component_noinst = libmca_errmgr_hnp.la +component_noinst = libmca_ess_cnos.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_errmgr_hnp_la_SOURCES = $(sources) -mca_errmgr_hnp_la_LDFLAGS = -module -avoid-version +mca_ess_cnos_la_SOURCES = $(sources) +mca_ess_cnos_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_errmgr_hnp_la_SOURCES =$(sources) -libmca_errmgr_hnp_la_LDFLAGS = -module -avoid-version +libmca_ess_cnos_la_SOURCES =$(sources) +libmca_ess_cnos_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sds/cnos/configure.m4 b/orte/mca/ess/cnos/configure.m4 similarity index 67% rename from orte/mca/sds/cnos/configure.m4 rename to orte/mca/ess/cnos/configure.m4 index c3216b9d71..b3db391163 100644 --- a/orte/mca/sds/cnos/configure.m4 +++ b/orte/mca/ess/cnos/configure.m4 @@ -17,9 +17,14 @@ # $HEADER$ # -# MCA_sds_cnos_CONFIG([action-if-found], [action-if-not-found]) +# MCA_ess_cnos_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_sds_cnos_CONFIG],[ +AC_DEFUN([MCA_ess_cnos_CONFIG],[ # check for cnos functions - AC_CHECK_FUNC([cnos_get_rank], [$1], [$2]) + # a bit of a hack,,, we don't want ess_cnos if alps + # was requested, and we can't rely on build priority because + # ess_alps uses priorty 10 so that ess_hnp is built as well. + AC_CHECK_FUNC([cnos_get_rank], + [OMPI_CHECK_ALPS([ess_cnos], [$2], [$1])], + [$2]) ])dnl diff --git a/orte/mca/ras/dash_host/configure.params b/orte/mca/ess/cnos/configure.params similarity index 88% rename from orte/mca/ras/dash_host/configure.params rename to orte/mca/ess/cnos/configure.params index e959acf694..4e4404321c 100644 --- a/orte/mca/ras/dash_host/configure.params +++ b/orte/mca/ess/cnos/configure.params @@ -19,7 +19,10 @@ # $HEADER$ # -# Specific to this module - - PARAM_CONFIG_FILES="Makefile" + +# +# Set the config priority so that, if we can build, +# only CNOS component(s) will build + +PARAM_CONFIG_PRIORITY=30 diff --git a/orte/util/universe_setup_file_io.h b/orte/mca/ess/cnos/ess_cnos.h similarity index 66% rename from orte/util/universe_setup_file_io.h rename to orte/mca/ess/cnos/ess_cnos.h index 40f61817e3..89dae1240f 100644 --- a/orte/util/universe_setup_file_io.h +++ b/orte/mca/ess/cnos/ess_cnos.h @@ -1,5 +1,4 @@ /* - * * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. @@ -15,19 +14,21 @@ * Additional copyrights may follow * * $HEADER$ - * - * $Id: ompi_universe_setup_file I/O functions $ - * */ -#ifndef ORTE_UNIV_SETUP_FILE_IO_H -#define ORTE_UNIV_SETUP_FILE_IO_H +#ifndef ORTE_ESS_CNOS_H +#define ORTE_ESS_CNOS_H -#include "orte_config.h" -#include "orte/util/univ_info.h" +BEGIN_C_DECLS -ORTE_DECLSPEC int orte_write_universe_setup_file(char *filename, orte_universe_t *info); +/* + * Module open / close + */ +int orte_ess_cnos_component_open(void); +int orte_ess_cnos_component_close(void); +orte_ess_base_module_t* orte_ess_cnos_component_init(int *priority); -ORTE_DECLSPEC int orte_read_universe_setup_file(char *filename, orte_universe_t *info); -#endif +END_C_DECLS + +#endif /* ORTE_ESS_CNOS_H */ diff --git a/orte/mca/sds/cnos/sds_cnos_component.c b/orte/mca/ess/cnos/ess_cnos_component.c similarity index 63% rename from orte/mca/sds/cnos/sds_cnos_component.c rename to orte/mca/ess/cnos/ess_cnos_component.c index f54d4eab3f..c223c579fd 100644 --- a/orte/mca/sds/cnos/sds_cnos_component.c +++ b/orte/mca/ess/cnos/ess_cnos_component.c @@ -9,7 +9,6 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,25 +23,24 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/cnos/sds_cnos.h" -#include "opal/mca/base/mca_base_param.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/cnos/ess_cnos.h" -extern orte_sds_base_module_t orte_sds_cnos_module; +extern orte_ess_base_module_t orte_ess_cnos_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_cnos_component = { +orte_ess_base_component_t mca_ess_cnos_component = { /* First, the mca_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ "cnos", @@ -51,8 +49,8 @@ orte_sds_base_component_t mca_sds_cnos_component = { ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_cnos_component_open, - orte_sds_cnos_component_close + orte_ess_cnos_component_open, + orte_ess_cnos_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -62,41 +60,33 @@ orte_sds_base_component_t mca_sds_cnos_component = { }, /* Initialization / querying functions */ - orte_sds_cnos_component_init + orte_ess_cnos_component_init }; int -orte_sds_cnos_component_open(void) +orte_ess_cnos_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_cnos_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_cnos_component_init(int *priority) { - int id; - char *mode; - - /* okay, not seed/singleton attempt another approach */ - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - /* if mode isn't NULL, then we have an ORTE starter. Don't use - this component */ - if (NULL != mode) { - free(mode); - return NULL; - } - - *priority = 30; - return &orte_sds_cnos_module; + /* since we are not launched by an ORTE launcher, + * we want to be selected ahead of the singleton + * component if we detect our supported environment. + * So ensure that our priority is higher than + * the singleton's + */ + *priority = 60; + return &orte_ess_cnos_module; } int -orte_sds_cnos_component_close(void) +orte_ess_cnos_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/cnos/ess_cnos_module.c b/orte/mca/ess/cnos/ess_cnos_module.c new file mode 100644 index 0000000000..e68e6f986e --- /dev/null +++ b/orte/mca/ess/cnos/ess_cnos_module.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/util/output.h" + +#include "orte/mca/errmgr/base/base.h" +#include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/runtime/orte_globals.h" +#include "orte/mca/grpcomm/base/base.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/cnos/ess_cnos.h" + +static int rte_init(char flags); +static int rte_finalize(void); +static void rte_abort(int status, bool report) __opal_attribute_noreturn__; + +orte_ess_base_module_t orte_ess_cnos_module = { + rte_init, + rte_finalize, + rte_abort +}; + +static int rte_init(char flags) +{ + int rc; + + /* Get our process information */ + + /* Procs in this environment are directly launched. Hence, there + * was no mpirun to create a jobid for us, and each app proc is + * going to have to fend for itself. For now, we assume that the + * jobid is some arbitrary number (say, 1). + */ + ORTE_PROC_MY_NAME->jobid = 1; + + /* find our vpid from cnos */ + ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank(); + + /* Get the number of procs in the job from cnos */ + orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size(); + + /* MPI_Init needs the grpcomm framework, so we have to init it */ + if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_grpcomm_base_select())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* that's all we need here */ + return ORTE_SUCCESS; +} + + +static int rte_finalize(void) +{ + /* just cleanup the things we used */ + orte_grpcomm_base_close(); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} + +static void rte_abort(int status, bool report) +{ + exit(status); +} diff --git a/orte/mca/gpr/configure.m4 b/orte/mca/ess/configure.m4 similarity index 82% rename from orte/mca/gpr/configure.m4 rename to orte/mca/ess/configure.m4 index 9d8f01f55a..d23bff8708 100644 --- a/orte/mca/gpr/configure.m4 +++ b/orte/mca/ess/configure.m4 @@ -10,4 +10,4 @@ dnl $HEADER$ dnl dnl we only want same priority level components -m4_define(MCA_gpr_CONFIGURE_MODE, STOP_AT_FIRST_PRIORITY) +m4_define(MCA_ess_CONFIGURE_MODE, STOP_AT_FIRST_PRIORITY) diff --git a/orte/mca/pls/poe/Makefile.am b/orte/mca/ess/env/Makefile.am similarity index 75% rename from orte/mca/pls/poe/Makefile.am rename to orte/mca/ess/env/Makefile.am index ce436bd911..73491cad02 100644 --- a/orte/mca/pls/poe/Makefile.am +++ b/orte/mca/ess/env/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - pls_poe.h \ - pls_poe_component.c \ - pls_poe_module.c + ess_env.h \ + ess_env_component.c \ + ess_env_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_pls_poe_DSO +if OMPI_BUILD_ess_env_DSO component_noinst = -component_install = mca_pls_poe.la +component_install = mca_ess_env.la else -component_noinst = libmca_pls_poe.la +component_noinst = libmca_ess_env.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_pls_poe_la_SOURCES = $(sources) -mca_pls_poe_la_LDFLAGS = -module -avoid-version +mca_ess_env_la_SOURCES = $(sources) +mca_ess_env_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_pls_poe_la_SOURCES =$(sources) -libmca_pls_poe_la_LDFLAGS = -module -avoid-version +libmca_ess_env_la_SOURCES =$(sources) +libmca_ess_env_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/ess/env/configure.m4 b/orte/mca/ess/env/configure.m4 new file mode 100644 index 0000000000..99fef655c0 --- /dev/null +++ b/orte/mca/ess/env/configure.m4 @@ -0,0 +1,13 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Sandia National Laboratories. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ess_env_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_ess_env_CONFIG], [$1]) diff --git a/orte/mca/gpr/proxy/configure.params b/orte/mca/ess/env/configure.params similarity index 91% rename from orte/mca/gpr/proxy/configure.params rename to orte/mca/ess/env/configure.params index 0d1335d6d3..c410dd1eaf 100644 --- a/orte/mca/gpr/proxy/configure.params +++ b/orte/mca/ess/env/configure.params @@ -19,7 +19,9 @@ # $HEADER$ # -# Specific to this module - PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that we build +# whenever someone using daemons does + PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/ess/env/ess_env.h b/orte/mca/ess/env/ess_env.h new file mode 100644 index 0000000000..15fe9d73ef --- /dev/null +++ b/orte/mca/ess/env/ess_env.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_ESS_ENV_H +#define ORTE_ESS_ENV_H + +BEGIN_C_DECLS + +/* + * Module open / close + */ +int orte_ess_env_component_open(void); +int orte_ess_env_component_close(void); +orte_ess_base_module_t* orte_ess_env_component_init(int *priority); + + +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_env_component; + +END_C_DECLS + +#endif /* ORTE_ESS_ENV_H */ diff --git a/orte/mca/sds/slurm/sds_slurm_component.c b/orte/mca/ess/env/ess_env_component.c similarity index 58% rename from orte/mca/sds/slurm/sds_slurm_component.c rename to orte/mca/ess/env/ess_env_component.c index 4851cb35a8..901fb6d553 100644 --- a/orte/mca/sds/slurm/sds_slurm_component.c +++ b/orte/mca/ess/env/ess_env_component.c @@ -23,33 +23,36 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/slurm/sds_slurm.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_slurm_module; +#include "orte/util/proc_info.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/env/ess_env.h" + +extern orte_ess_base_module_t orte_ess_env_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_slurm_component = { +orte_ess_base_component_t mca_ess_env_component = { { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ - "slurm", + "env", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_slurm_component_open, - orte_sds_slurm_component_close + orte_ess_env_component_open, + orte_ess_env_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -59,49 +62,46 @@ orte_sds_base_component_t mca_sds_slurm_component = { }, /* Initialization / querying functions */ - orte_sds_slurm_component_init + orte_ess_env_component_init }; int -orte_sds_slurm_component_open(void) +orte_ess_env_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_slurm_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_env_component_init(int *priority) { - int id; - char *slurm_nodeid, *mode; - /* okay, not seed/singleton - see if we're running under slurm */ - slurm_nodeid = getenv("SLURM_NODEID"); - if (NULL == slurm_nodeid) { - return NULL; + /* we are the env module, so set the priority to + * be higher than the tool component so that a + * tool launched as a distributed set of procs + * (i.e., a "tool with name") will select this + * module, but low enough that any other environment + * will override us + */ + + /* if we don't have a path back to the HNP, then we + * were not launched by mpirun, so don't pick us as + * it would be impossible for the correct env vars + * to have been set! + */ + if (NULL != orte_process_info.my_hnp_uri) { + *priority = 20; + return &orte_ess_env_module; } - - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - if (NULL == mode || 0 != strcmp("slurm", mode)) { - if(NULL != mode) { - free(mode); - } - return NULL; - } - - if(NULL != mode) { - free(mode); - } - *priority = 20; - return &orte_sds_slurm_module; + + /* if not, then return NULL - we cannot be selected */ + return NULL; } int -orte_sds_slurm_component_close(void) +orte_ess_env_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c new file mode 100644 index 0000000000..cd444306d2 --- /dev/null +++ b/orte/mca/ess/env/ess_env_module.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/threads/mutex.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/util/os_path.h" +#include "opal/util/cmd_line.h" +#include "opal/util/malloc.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/grpcomm/base/base.h" +#include "orte/mca/iof/base/base.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ras/base/base.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/odls/base/base.h" + +#include "orte/mca/rmaps/base/base.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/mca/filem/base/base.h" +#include "orte/util/proc_info.h" +#include "orte/util/session_dir.h" +#include "orte/util/sys_info.h" +#include "orte/util/hnp_contact.h" +#include "orte/util/name_fns.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_wait.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/runtime/orte_cr.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/env/ess_env.h" + +static int env_set_name(void); + +static int rte_init(char flags); +static int rte_finalize(void); + + +orte_ess_base_module_t orte_ess_env_module = { + rte_init, + rte_finalize, + orte_ess_base_app_abort +}; + + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + + /* Start by getting a unique name from the enviro */ + env_set_name(); + + /* if I am a daemon, complete my setup using the + * default procedure + */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_orted_setup"; + goto error; + } + + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto error; + } + + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_app_setup"; + goto error; + } + + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +static int rte_finalize(void) +{ + int ret; + + /* if I am a daemon, finalize using the default procedure */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { + ORTE_ERROR_LOG(ret); + } + } + + return ret; +} + +static int env_set_name(void) +{ + char *jobid_str, *procid_str; + int id, rc; + orte_jobid_t jobid; + orte_vpid_t vpid; + + id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); + mca_base_param_lookup_string(id, &jobid_str); + if (NULL == jobid_str) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_str))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + free(jobid_str); + + id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); + mca_base_param_lookup_string(id, &procid_str); + if (NULL == procid_str) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, procid_str))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + free(procid_str); + + ORTE_PROC_MY_NAME->jobid = jobid; + ORTE_PROC_MY_NAME->vpid = vpid; + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "ess:env set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* get the non-name common environmental variables */ + if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} + diff --git a/orte/mca/ess/ess.h b/orte/mca/ess/ess.h new file mode 100644 index 0000000000..7522020163 --- /dev/null +++ b/orte/mca/ess/ess.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The OpenRTE Environment-Specific Services + * + */ + +#ifndef ORTE_ESS_H +#define ORTE_ESS_H + +#include "opal/mca/mca.h" + +BEGIN_C_DECLS + +/* + * Module and component structures + */ +struct orte_ess_base_module_1_0_0_t; +typedef struct orte_ess_base_module_1_0_0_t orte_ess_base_module_1_0_0_t; +typedef orte_ess_base_module_1_0_0_t orte_ess_base_module_t; + +struct orte_ess_base_component_1_0_0_t; +typedef struct orte_ess_base_component_1_0_0_t orte_ess_base_component_1_0_0_t; +typedef orte_ess_base_component_1_0_0_t orte_ess_base_component_t; + +/** + * Selection function + */ +typedef orte_ess_base_module_t* +(*orte_ess_base_component_init_fn_t)(int *priority); + +/* + * API functions + */ + +/* + * Initialize the RTE for this environment + */ +typedef int (*orte_ess_base_module_init_fn_t)(char flags); + +/* + * Finalize the RTE for this environment + */ +typedef int (*orte_ess_base_module_finalize_fn_t)(void); + +/** + * Abort the current application + * + * Aborts currently running application, NOTE: We do NOT call the + * regular C-library "abort" function, even + * though that would have alerted us to the fact that this is + * an abnormal termination, because it would automatically cause + * a core file to be generated. The "report" flag indicates if the + * function should create an appropriate file to alert the local + * orted that termination was abnormal. + */ +typedef void (*orte_ess_base_module_abort_fn_t)(int status, bool report); + + +/* + * the standard module data structure + */ +struct orte_ess_base_module_1_0_0_t { + orte_ess_base_module_init_fn_t init; + orte_ess_base_module_finalize_fn_t finalize; + orte_ess_base_module_abort_fn_t abort; +}; + + +/* + * the standard component data structure + */ +struct orte_ess_base_component_1_0_0_t { + mca_base_component_t ess_version; + mca_base_component_data_1_0_0_t ess_data; + orte_ess_base_component_init_fn_t ess_init; +}; + +/* + * Macro for use in components that are of type ess v1.0.0 + */ +#define ORTE_ESS_BASE_VERSION_1_0_0 \ + /* ess v1.0 is chained to MCA v1.0 */ \ + MCA_BASE_VERSION_1_0_0, \ + /* ess v1.0 */ \ + "ess", 1, 0, 0 + +/* Global structure for accessing ESS functions */ +ORTE_DECLSPEC extern orte_ess_base_module_t orte_ess; /* holds selected module's function pointers */ + +END_C_DECLS + +#endif diff --git a/orte/mca/sds/env/Makefile.am b/orte/mca/ess/hnp/Makefile.am similarity index 75% rename from orte/mca/sds/env/Makefile.am rename to orte/mca/ess/hnp/Makefile.am index c3ec6879f1..4cf3a2c567 100644 --- a/orte/mca/sds/env/Makefile.am +++ b/orte/mca/ess/hnp/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - sds_env.h \ - sds_env_component.c \ - sds_env_module.c + ess_hnp.h \ + ess_hnp_component.c \ + ess_hnp_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_sds_env_DSO +if OMPI_BUILD_ess_hnp_DSO component_noinst = -component_install = mca_sds_env.la +component_install = mca_ess_hnp.la else -component_noinst = libmca_sds_env.la +component_noinst = libmca_ess_hnp.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sds_env_la_SOURCES = $(sources) -mca_sds_env_la_LDFLAGS = -module -avoid-version +mca_ess_hnp_la_SOURCES = $(sources) +mca_ess_hnp_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_sds_env_la_SOURCES =$(sources) -libmca_sds_env_la_LDFLAGS = -module -avoid-version +libmca_ess_hnp_la_SOURCES =$(sources) +libmca_ess_hnp_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/ess/hnp/configure.m4 b/orte/mca/ess/hnp/configure.m4 new file mode 100644 index 0000000000..56c49a21e1 --- /dev/null +++ b/orte/mca/ess/hnp/configure.m4 @@ -0,0 +1,13 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Sandia National Laboratories. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ess_hnp_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_ess_hnp_CONFIG], [$1]) diff --git a/orte/mca/ns/proxy/configure.params b/orte/mca/ess/hnp/configure.params similarity index 89% rename from orte/mca/ns/proxy/configure.params rename to orte/mca/ess/hnp/configure.params index 3513f8d956..c410dd1eaf 100644 --- a/orte/mca/ns/proxy/configure.params +++ b/orte/mca/ess/hnp/configure.params @@ -19,6 +19,9 @@ # $HEADER$ # -# Specific to this module - PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that we build +# whenever someone using daemons does + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/ess/hnp/ess_hnp.h b/orte/mca/ess/hnp/ess_hnp.h new file mode 100644 index 0000000000..e086fe7b78 --- /dev/null +++ b/orte/mca/ess/hnp/ess_hnp.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_ESS_HNP_H +#define ORTE_ESS_HNP_H + +BEGIN_C_DECLS + +/* + * Module open / close + */ +int orte_ess_hnp_component_open(void); +int orte_ess_hnp_component_close(void); +orte_ess_base_module_t* orte_ess_hnp_component_init(int *priority); + + +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_hnp_component; + +END_C_DECLS + +#endif /* ORTE_ESS_HNP_H */ diff --git a/orte/mca/sds/seed/sds_seed_component.c b/orte/mca/ess/hnp/ess_hnp_component.c similarity index 68% rename from orte/mca/sds/seed/sds_seed_component.c rename to orte/mca/ess/hnp/ess_hnp_component.c index 950a268411..c009323afe 100644 --- a/orte/mca/sds/seed/sds_seed_component.c +++ b/orte/mca/ess/hnp/ess_hnp_component.c @@ -23,36 +23,36 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/seed/sds_seed.h" #include "opal/mca/base/mca_base_param.h" + #include "orte/util/proc_info.h" -extern orte_sds_base_module_t orte_sds_seed_module; +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/hnp/ess_hnp.h" + +extern orte_ess_base_module_t orte_ess_hnp_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_seed_component = { - /* First, the mca_component_t struct containing meta information - about the component itself */ +orte_ess_base_component_t mca_ess_hnp_component = { { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ - "seed", + "hnp", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_seed_component_open, - orte_sds_seed_component_close + orte_ess_hnp_component_open, + orte_ess_hnp_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -62,29 +62,37 @@ orte_sds_base_component_t mca_sds_seed_component = { }, /* Initialization / querying functions */ - orte_sds_seed_component_init + orte_ess_hnp_component_init }; int -orte_sds_seed_component_open(void) +orte_ess_hnp_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_seed_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_hnp_component_init(int *priority) { - if (orte_process_info.seed == false) return NULL; - *priority = 40; - return &orte_sds_seed_module; + /* we are the hnp module - we need to be selected + * IFF we are designated as the hnp + */ + if (orte_process_info.hnp) { + *priority = 100; + return &orte_ess_hnp_module; + } + + /* else, we are not */ + *priority = -1; + return NULL; } int -orte_sds_seed_component_close(void) +orte_ess_hnp_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c new file mode 100644 index 0000000000..2b236bfbfa --- /dev/null +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "opal/util/os_path.h" +#include "opal/util/malloc.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/grpcomm/base/base.h" +#include "orte/mca/iof/base/base.h" +#include "orte/mca/ras/base/base.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/odls/base/base.h" + +#include "orte/mca/rmaps/base/base.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/mca/filem/base/base.h" +#include "orte/util/proc_info.h" +#include "orte/util/session_dir.h" +#include "orte/util/sys_info.h" +#include "orte/util/hnp_contact.h" +#include "orte/util/name_fns.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_wait.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/runtime/orte_cr.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/hnp/ess_hnp.h" + +static int rte_init(char flags); +static int rte_finalize(void); +static void rte_abort(int status, bool report) __opal_attribute_noreturn__; + + +orte_ess_base_module_t orte_ess_hnp_module = { + rte_init, + rte_finalize, + rte_abort +}; + + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + char *jobid_str, *procid_str, *contact_path; + orte_job_t *jdata; + orte_node_t *node; + orte_proc_t *proc; + orte_std_cntr_t index; + + /* Since we are the HNP, then responsibility for + * defining the name falls to the PLM component for our + * respective environment - hence, we have to open the PLM + * first and select that component. Note that ONLY the + * HNP ever uses a PLM component anyway + */ + if (ORTE_SUCCESS != (ret = orte_plm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_select"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_set_hnp_name"; + goto error; + } + + /* Setup the communication infrastructure */ + /* + * Runtime Messaging Layer + */ + if (ORTE_SUCCESS != (ret = orte_rml_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_select"; + goto error; + } + /* + * Routed system + */ + if (ORTE_SUCCESS != (ret = orte_routed_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_select"; + goto error; + } + /* + * Group communications + */ + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_select"; + goto error; + } + + /* Now provide a chance for the PLM + * to perform any module-specific init functions. This + * needs to occur AFTER the communications are setup + * as it may involve starting a non-blocking recv + */ + if (ORTE_SUCCESS != (ret = orte_plm.init())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_init"; + goto error; + } + + /* + * Setup the remaining resource + * management and errmgr frameworks - application procs + * and daemons do not open these frameworks as they only use + * the hnp proxy support in the PLM framework. + */ + if (ORTE_SUCCESS != (ret = orte_ras_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_ras_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_ras_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_ras_base_find_available"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_rmaps_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rmaps_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_rmaps_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rmaps_base_find_available"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_errmgr_base_open())) { + error = "orte_errmgr_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_errmgr_base_select"; + goto error; + } + + /* Open/select the odls */ + if (ORTE_SUCCESS != (ret = orte_odls_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_odls_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_select"; + goto error; + } + + /* enable communication with the rml */ + if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml.enable_comm"; + goto error; + } + + /* setup my session directory */ + if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobid_str, ORTE_PROC_MY_NAME->jobid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_jobid_to_string"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_util_convert_vpid_to_string(&procid_str, ORTE_PROC_MY_NAME->vpid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_vpid_to_string"; + goto error; + } + + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s setting up session dir with\n\ttmpdir: %s\n\tuser %s\n\thost %s\n\tjobid %s\n\tprocid %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, + orte_system_info.user, orte_system_info.nodename, jobid_str, procid_str)); + + if (ORTE_SUCCESS != (ret = orte_session_dir(true, + orte_process_info.tmpdir_base, + orte_system_info.user, + orte_system_info.nodename, NULL, + jobid_str, procid_str))) { + if (jobid_str != NULL) free(jobid_str); + if (procid_str != NULL) free(procid_str); + ORTE_ERROR_LOG(ret); + error = "orte_session_dir"; + goto error; + } + if (NULL != jobid_str) { + free(jobid_str); + } + if (NULL != procid_str) { + free(procid_str); + } + + /* Once the session directory location has been established, set + the opal_output hnp file location to be in the + proc-specific session directory. */ + opal_output_set_output_file_info(orte_process_info.proc_session_dir, + "output-", NULL, NULL); + + /* save my contact info in a file for others to find */ + contact_path = opal_os_path(false, orte_process_info.job_session_dir, + "contact.txt", NULL); + + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s writing contact file %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + contact_path)); + + if (ORTE_SUCCESS != (ret = orte_write_hnp_contact_file(contact_path))) { + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s writing contact file failed with error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_ERROR_NAME(ret))); + } else { + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s wrote contact file", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + } + free(contact_path); + + /* Setup the job data object for the daemons */ + /* create and store the job data object */ + jdata = OBJ_NEW(orte_job_t); + jdata->jobid = ORTE_PROC_MY_NAME->jobid; + orte_pointer_array_add(&index, orte_job_data, jdata); + + /* create and store a node object where we are */ + node = OBJ_NEW(orte_node_t); + node->name = strdup(orte_system_info.nodename); + orte_pointer_array_add(&node->index, orte_node_pool, node); + + /* create and store a proc object for us */ + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->name.vpid = ORTE_PROC_MY_NAME->vpid; + proc->pid = orte_process_info.pid; + proc->rml_uri = orte_rml.get_contact_info(); + proc->state = ORTE_PROC_STATE_RUNNING; + OBJ_RETAIN(node); /* keep accounting straight */ + proc->node = node; + orte_pointer_array_add(&index, jdata->procs, proc); + + /* record that the daemon (i.e., us) is on this node + * NOTE: we do not add the proc object to the node's + * proc array because we are not an application proc. + * Instead, we record it in the daemon field of the + * node object + */ + OBJ_RETAIN(proc); /* keep accounting straight */ + node->daemon = proc; + node->daemon_launched = true; + node->state = ORTE_NODE_STATE_UP; + + /* record that the daemon job is running */ + jdata->num_procs = 1; + jdata->state = ORTE_JOB_STATE_RUNNING; + + /* setup the routed info - the selected routed component + * will know what to do. + */ + if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_routed.init_routes"; + goto error; + } + + /* + * setup I/O forwarding system - must come after we init routes */ + if (ORTE_SUCCESS != (ret = orte_iof_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_iof_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_select"; + goto error; + } + + /* setup the FileM */ + if (ORTE_SUCCESS != (ret = orte_filem_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_filem_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_filem_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_filem_base_select"; + goto error; + } + +#if OPAL_ENABLE_FT == 1 + /* + * Setup the SnapC + */ + if (ORTE_SUCCESS != (ret = orte_snapc_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_select"; + goto error; + } + + /* For HNP, ORTE doesn't need the OPAL CR stuff */ + opal_cr_set_enabled(false); +#else + opal_cr_set_enabled(false); +#endif + + /* + * Initalize the CR setup + * Note: Always do this, even in non-FT builds. + * If we don't some user level tools may hang. + */ + if (ORTE_SUCCESS != (ret = orte_cr_init())) { + ORTE_ERROR_LOG(ret); + error = "orte_cr_init"; + goto error; + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +static int rte_finalize(void) +{ + char *contact_path; + + /* remove my contact info file */ + contact_path = opal_os_path(false, orte_process_info.top_session_dir, + "contact.txt", NULL); + unlink(contact_path); + free(contact_path); + + orte_cr_finalize(); + +#if OPAL_ENABLE_FT == 1 + orte_snapc_base_close(); +#endif + orte_filem_base_close(); + + orte_odls_base_close(); + + orte_wait_finalize(); + orte_iof_base_close(); + + /* finalize selected modules so they can de-register + * any receives + */ + orte_ras_base_close(); + orte_rmaps_base_close(); + orte_plm_base_close(); + orte_errmgr_base_close(); + + /* now can close the rml and its friendly group comm */ + orte_grpcomm_base_close(); + orte_routed_base_close(); + orte_rml_base_close(); + + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} + +/* + * For application procs, we do NOT call the regular + * C-library "abort" function, even though that would have + * alerted us to the fact that this is an abnormal termination, + * because it would automatically cause a core file to be + * generated. On large systems, that can be overwhelming + * (imagine a few thousand Gbyte-sized files hitting + * a shared file system simultaneously...ouch!). + * + * However, the HNP is only ONE process, so we can do it + * here as the core file might prove useful. + */ +static void rte_abort(int status, bool report) +{ + /* do NOT do a normal finalize as this will very likely + * hang the process. We are aborting due to an abnormal condition + * that precludes normal cleanup + * + * We do need to do the following bits to make sure we leave a + * clean environment. Taken from orte_finalize(): + * - Assume errmgr cleans up child processes before we exit. + */ + + /* CRS cleanup since it may have a named pipe and thread active */ + orte_cr_finalize(); + + /* - Clean out the global structures + * (not really necessary, but good practice) + */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + /* Now abort */ + abort(); +} + diff --git a/orte/mca/sds/lsf/Makefile.am b/orte/mca/ess/lsf/Makefile.am similarity index 71% rename from orte/mca/sds/lsf/Makefile.am rename to orte/mca/ess/lsf/Makefile.am index faba825c09..289fedf4d3 100644 --- a/orte/mca/sds/lsf/Makefile.am +++ b/orte/mca/ess/lsf/Makefile.am @@ -20,29 +20,29 @@ AM_CPPFLAGS = $(pls_lsf_CPPFLAGS) sources = \ - sds_lsf.h \ - sds_lsf_component.c \ - sds_lsf_module.c + ess_lsf.h \ + ess_lsf_component.c \ + ess_lsf_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_sds_lsf_DSO +if OMPI_BUILD_ess_lsf_DSO component_noinst = -component_install = mca_sds_lsf.la +component_install = mca_ess_lsf.la else -component_noinst = libmca_sds_lsf.la +component_noinst = libmca_ess_lsf.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sds_lsf_la_SOURCES = $(sources) -mca_sds_lsf_la_LDFLAGS = -module -avoid-version $(sds_lsf_LDFLAGS) -mca_sds_lsf_la_LIBADD = $(sds_lsf_LIBS) +mca_ess_lsf_la_SOURCES = $(sources) +mca_ess_lsf_la_LDFLAGS = -module -avoid-version $(ess_lsf_LDFLAGS) +mca_ess_lsf_la_LIBADD = $(ess_lsf_LIBS) noinst_LTLIBRARIES = $(component_noinst) -libmca_sds_lsf_la_SOURCES =$(sources) -libmca_sds_lsf_la_LDFLAGS = -module -avoid-version $(sds_lsf_LDFLAGS) -libmca_sds_lsf_la_LIBADD = $(sds_lsf_LIBS) +libmca_ess_lsf_la_SOURCES =$(sources) +libmca_ess_lsf_la_LDFLAGS = -module -avoid-version $(ess_lsf_LDFLAGS) +libmca_ess_lsf_la_LIBADD = $(ess_lsf_LIBS) diff --git a/orte/mca/sds/lsf/configure.m4 b/orte/mca/ess/lsf/configure.m4 similarity index 71% rename from orte/mca/sds/lsf/configure.m4 rename to orte/mca/ess/lsf/configure.m4 index 9f432c93ab..48bcae15a0 100644 --- a/orte/mca/sds/lsf/configure.m4 +++ b/orte/mca/ess/lsf/configure.m4 @@ -18,21 +18,21 @@ # $HEADER$ # -# MCA_sds_lsf_CONFIG([action-if-found], [action-if-not-found]) +# MCA_ess_lsf_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_sds_lsf_CONFIG],[ - OMPI_CHECK_LSF([sds_lsf], [sds_lsf_good=1], [sds_lsf_good=0]) +AC_DEFUN([MCA_ess_lsf_CONFIG],[ + OMPI_CHECK_LSF([ess_lsf], [ess_lsf_good=1], [ess_lsf_good=0]) # if check worked, set wrapper flags if so. # Evaluate succeed / fail - AS_IF([test "$sds_lsf_good" = "1"], - [sds_lsf_WRAPPER_EXTRA_LDFLAGS="$sds_lsf_LDFLAGS" - sds_lsf_WRAPPER_EXTRA_LIBS="$sds_lsf_LIBS" + AS_IF([test "$ess_lsf_good" = "1"], + [ess_lsf_WRAPPER_EXTRA_LDFLAGS="$ess_lsf_LDFLAGS" + ess_lsf_WRAPPER_EXTRA_LIBS="$ess_lsf_LIBS" $1], [$2]) # set build flags to use in makefile - AC_SUBST([sds_lsf_CPPFLAGS]) - AC_SUBST([sds_lsf_LDFLAGS]) - AC_SUBST([sds_lsf_LIBS]) + AC_SUBST([ess_lsf_CPPFLAGS]) + AC_SUBST([ess_lsf_LDFLAGS]) + AC_SUBST([ess_lsf_LIBS]) ])dnl diff --git a/orte/mca/ess/lsf/configure.params b/orte/mca/ess/lsf/configure.params new file mode 100644 index 0000000000..0727eefc3f --- /dev/null +++ b/orte/mca/ess/lsf/configure.params @@ -0,0 +1,27 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that, if we can build, +# all the LSF and supporting components will build + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/sds/lsf/sds_lsf.h b/orte/mca/ess/lsf/ess_lsf.h similarity index 68% rename from orte/mca/sds/lsf/sds_lsf.h rename to orte/mca/ess/lsf/ess_lsf.h index b680f71580..e660adb0d5 100644 --- a/orte/mca/sds/lsf/sds_lsf.h +++ b/orte/mca/ess/lsf/ess_lsf.h @@ -17,30 +17,20 @@ * $HEADER$ */ -#ifndef ORTE_SDS_LSF_H -#define ORTE_SDS_LSF_H +#ifndef ORTE_ESS_LSF_H +#define ORTE_ESS_LSF_H BEGIN_C_DECLS -ORTE_MODULE_DECLSPEC extern orte_sds_base_component_t mca_sds_lsf_component; +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_lsf_component; /* * Module open / close */ -int orte_sds_lsf_component_open(void); -int orte_sds_lsf_component_close(void); -orte_sds_base_module_t* orte_sds_lsf_component_init(int *priority); - -/* - * Startup / Shutdown - */ -int orte_sds_lsf_finalize(void); - -/* - * Module functions - */ -int orte_sds_lsf_set_name(void); +int orte_ess_lsf_component_open(void); +int orte_ess_lsf_component_close(void); +orte_ess_base_module_t* orte_ess_lsf_component_init(int *priority); END_C_DECLS -#endif /* ORTE_SDS_LSF_H */ +#endif /* ORTE_ESS_LSF_H */ diff --git a/orte/mca/sds/lsf/sds_lsf_component.c b/orte/mca/ess/lsf/ess_lsf_component.c similarity index 61% rename from orte/mca/sds/lsf/sds_lsf_component.c rename to orte/mca/ess/lsf/ess_lsf_component.c index 3d3facd99f..d376da9271 100644 --- a/orte/mca/sds/lsf/sds_lsf_component.c +++ b/orte/mca/ess/lsf/ess_lsf_component.c @@ -18,25 +18,28 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/lsf/sds_lsf.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_lsf_module; +#include "orte/util/proc_info.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/lsf/ess_lsf.h" + +extern orte_ess_base_module_t orte_ess_lsf_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_lsf_component = { +orte_ess_base_component_t mca_ess_lsf_component = { { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ "lsf", @@ -45,8 +48,8 @@ orte_sds_base_component_t mca_sds_lsf_component = { ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_lsf_component_open, - orte_sds_lsf_component_close + orte_ess_lsf_component_open, + orte_ess_lsf_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -56,40 +59,36 @@ orte_sds_base_component_t mca_sds_lsf_component = { }, /* Initialization / querying functions */ - orte_sds_lsf_component_init + orte_ess_lsf_component_init }; -int orte_sds_lsf_component_open(void) +int orte_ess_lsf_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t *orte_sds_lsf_component_init(int *priority) +orte_ess_base_module_t *orte_ess_lsf_component_init(int *priority) { - int id; - char *mode; - - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - if (NULL == mode || 0 != strcmp("lsf", mode)) { - if (NULL != mode) { - free(mode); - } - return NULL; + /* Are we running under an LSF job? Were + * we given a path back to the HNP? If the + * answer to both is "yes", then we were launched + * by mpirun in an LSF world + */ + + if (NULL != getenv("LSB_JOBID") && + NULL != orte_process_info.my_hnp_uri) { + *priority = 40; + return &orte_ess_lsf_module; } - - if (NULL != mode) { - free(mode); - } - *priority = 20; - return &orte_sds_lsf_module; + + /* nope, not here */ + return NULL; } -int orte_sds_lsf_component_close(void) +int orte_ess_lsf_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c new file mode 100644 index 0000000000..051a7a9cbc --- /dev/null +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ +#include + +#include + +#include "opal/util/argv.h" +#include "opal/util/opal_environ.h" + +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" +#include "opal/mca/base/mca_base_param.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/sys_info.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/lsf/ess_lsf.h" + +static int lsf_set_name(void); + +static int rte_init(char flags); +static int rte_finalize(void); + +orte_ess_base_module_t orte_ess_lsf_module = { + rte_init, + rte_finalize, + orte_ess_base_app_abort +}; + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + + /* Start by getting a unique name */ + lsf_set_name(); + + /* if I am a daemon, complete my setup using the + * default procedure + */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_orted_setup"; + goto error; + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto error; + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_app_setup"; + goto error; + } + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +static int rte_finalize(void) +{ + int ret; + + /* if I am a daemon, finalize using the default procedure */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { + ORTE_ERROR_LOG(ret); + } + } + + return ret; +} + +static int lsf_set_name(void) +{ + int rc; + int id; + char* name_string = NULL; + int lsf_nodeid; + + /* start by getting our jobid, and vpid (which is the + starting vpid for the list of daemons) */ + id = mca_base_param_register_string("orte", "ess", "name", NULL, NULL); + mca_base_param_lookup_string(id, &name_string); + + if (name_string != NULL) { + if (ORTE_SUCCESS != + (rc = orte_util_convert_string_to_process_name(&ORTE_PROC_MY_NAME, name_string))) { + ORTE_ERROR_LOG(rc); + free(name_string); + return rc; + } + free(name_string); + } else { + orte_jobid_t jobid; + orte_vpid_t vpid; + char* jobid_string; + char* vpid_string; + + id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); + mca_base_param_lookup_string(id, &jobid_string); + if (NULL == jobid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != + (rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); + mca_base_param_lookup_string(id, &vpid_string); + if (NULL == vpid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != + (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + ORTE_PROC_MY_NAME->jobid; + ORTE_PROC_MY_NAME->vpid = vpid; + } + + /* fix up the base name and make it the "real" name */ + lsf_nodeid = atoi(getenv("LSF_PM_TASKID")); + ORTE_PROC_MY_NAME->vpid = lsf_nodeid; + + /* get the non-name common environmental variables */ + if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} diff --git a/orte/mca/sds/portals_utcp/Makefile.am b/orte/mca/ess/portals_utcp/Makefile.am similarity index 64% rename from orte/mca/sds/portals_utcp/Makefile.am rename to orte/mca/ess/portals_utcp/Makefile.am index 39a47cb167..515aa3dc39 100644 --- a/orte/mca/sds/portals_utcp/Makefile.am +++ b/orte/mca/ess/portals_utcp/Makefile.am @@ -16,32 +16,32 @@ # $HEADER$ # -AM_CPPFLAGS = $(sds_portals_utcp_CPPFLAGS) +AM_CPPFLAGS = $(ess_portals_utcp_CPPFLAGS) sources = \ - sds_portals_utcp.h \ - sds_portals_utcp_component.c \ - sds_portals_utcp_module.c + ess_portals_utcp.h \ + ess_portals_utcp_component.c \ + ess_portals_utcp_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_sds_portals_utcp_DSO +if OMPI_BUILD_ess_portals_utcp_DSO component_noinst = -component_install = mca_sds_portals_utcp.la +component_install = mca_ess_portals_utcp.la else -component_noinst = libmca_sds_portals_utcp.la +component_noinst = libmca_ess_portals_utcp.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sds_portals_utcp_la_SOURCES = $(sources) -mca_sds_portals_utcp_la_LDFLAGS = -module -avoid-version $(sds_portals_utcp_LDFLAGS) -mca_sds_portals_utcp_la_LIBADD = $(sds_portals_utcp_LIBS) +mca_ess_portals_utcp_la_SOURCES = $(sources) +mca_ess_portals_utcp_la_LDFLAGS = -module -avoid-version $(ess_portals_utcp_LDFLAGS) +mca_ess_portals_utcp_la_LIBADD = $(ess_portals_utcp_LIBS) noinst_LTLIBRARIES = $(component_noinst) -libmca_sds_portals_utcp_la_SOURCES =$(sources) -libmca_sds_portals_utcp_la_LDFLAGS = -module -avoid-version $(sds_portals_utcp_LDFLAGS) -libmca_sds_portals_utcp_la_LIBADD = $(sds_portals_utcp_LIBS) +libmca_ess_portals_utcp_la_SOURCES =$(sources) +libmca_ess_portals_utcp_la_LDFLAGS = -module -avoid-version $(ess_portals_utcp_LDFLAGS) +libmca_ess_portals_utcp_la_LIBADD = $(ess_portals_utcp_LIBS) diff --git a/orte/mca/sds/portals_utcp/configure.m4 b/orte/mca/ess/portals_utcp/configure.m4 similarity index 70% rename from orte/mca/sds/portals_utcp/configure.m4 rename to orte/mca/ess/portals_utcp/configure.m4 index f1bfa1f8f0..585dcc9053 100644 --- a/orte/mca/sds/portals_utcp/configure.m4 +++ b/orte/mca/ess/portals_utcp/configure.m4 @@ -18,15 +18,15 @@ # -# MCA_sds_portals_utcp_CONFIG(action-if-can-compile, +# MCA_ess_portals_utcp_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ -AC_DEFUN([MCA_sds_portals_utcp_CONFIG],[ +AC_DEFUN([MCA_ess_portals_utcp_CONFIG],[ # save compiler flags so that we don't alter them for later # components. - sds_portals_utcp_save_CPPFLAGS="$CPPFLAGS" - sds_portals_utcp_save_LDFLAGS="$LDFLAGS" - sds_portals_utcp_save_LIBS="$LIBS" + ess_portals_utcp_save_CPPFLAGS="$CPPFLAGS" + ess_portals_utcp_save_LDFLAGS="$LDFLAGS" + ess_portals_utcp_save_LIBS="$LIBS" # allow user a way to say where the Portals installation is AC_ARG_WITH(portals, @@ -35,27 +35,27 @@ AC_DEFUN([MCA_sds_portals_utcp_CONFIG],[ AS_IF([test -n "$with_portals"], [AS_IF([test -d "$with_portals/include"], - [sds_portals_utcp_CPPFLAGS="-I$with_portals/include" - CPPFLAGS="$CPPFLAGS $sds_portals_utcp_CPPFLAGS"], []) + [ess_portals_utcp_CPPFLAGS="-I$with_portals/include" + CPPFLAGS="$CPPFLAGS $ess_portals_utcp_CPPFLAGS"], []) AS_IF([test -d "$with_portals/lib"], - [sds_portals_utcp_LDFLAGS="-L$with_portals/lib" - LDFLAGS="$LDFLAGS $sds_portals_utcp_LDFLAGS"], [])]) + [ess_portals_utcp_LDFLAGS="-L$with_portals/lib" + LDFLAGS="$LDFLAGS $ess_portals_utcp_LDFLAGS"], [])]) # Try to find all the portals libraries (this is not fun!) AC_ARG_WITH(portals-libs, AC_HELP_STRING([--with-portals-libs=LIBS], [Libraries to link with for portals])) if test -n "$with_portals_libs" ; then - sds_portals_utcp_LIBS="" + ess_portals_utcp_LIBS="" for lib in $with_portals_libs ; do - sds_portals_utcp_LIBS="$sds_portals_utcp_LIBS -l$lib" + ess_portals_utcp_LIBS="$ess_portals_utcp_LIBS -l$lib" done fi - sds_portals_utcp_LIBS="-lp3utcp -lp3api -lp3lib -lp3rt -lp3utcp" + ess_portals_utcp_LIBS="-lp3utcp -lp3api -lp3lib -lp3rt -lp3utcp" # check for portals - LIBS="$LIBS $sds_portals_utcp_LIBS" + LIBS="$LIBS $ess_portals_utcp_LIBS" AC_MSG_CHECKING([for PtlGetRank]) AC_LINK_IFELSE([AC_LANG_PROGRAM([#include #include @@ -76,17 +76,17 @@ PtlGetRank(PTL_INVALID_HANDLE, &rank, &nprocs);])], # we don't actually need the Portals code for this component, # so don't link against them... - sds_portals_utcp_CPPFLAGS= - sds_portals_utcp_LDFLAGS= - sds_portals_utcp_LIBS= + ess_portals_utcp_CPPFLAGS= + ess_portals_utcp_LDFLAGS= + ess_portals_utcp_LIBS= # substitute in the things needed to build Portals - AC_SUBST([sds_portals_utcp_CPPFLAGS]) - AC_SUBST([sds_portals_utcp_LDFLAGS]) - AC_SUBST([sds_portals_utcp_LIBS]) + AC_SUBST([ess_portals_utcp_CPPFLAGS]) + AC_SUBST([ess_portals_utcp_LDFLAGS]) + AC_SUBST([ess_portals_utcp_LIBS]) # reset the flags for the next test - CPPFLAGS="$sds_portals_utcp_save_CPPFLAGS" - LDFLAGS="$sds_portals_utcp_save_LDFLAGS" - LIBS="$sds_portals_utcp_save_LIBS" + CPPFLAGS="$ess_portals_utcp_save_CPPFLAGS" + LDFLAGS="$ess_portals_utcp_save_LDFLAGS" + LIBS="$ess_portals_utcp_save_LIBS" ])dnl diff --git a/orte/mca/gpr/null/configure.params b/orte/mca/ess/portals_utcp/configure.params similarity index 88% rename from orte/mca/gpr/null/configure.params rename to orte/mca/ess/portals_utcp/configure.params index feb84f3613..67e1119704 100644 --- a/orte/mca/gpr/null/configure.params +++ b/orte/mca/ess/portals_utcp/configure.params @@ -19,7 +19,9 @@ # $HEADER$ # -# Specific to this module - PARAM_CONFIG_FILES="Makefile" -PARAM_CONFIG_PRIORITY=0 +# +# Set the config priority so that, if we can build, +# only the portals component will build + +PARAM_CONFIG_PRIORITY=50 diff --git a/orte/mca/rds/base/rds_base_no_ops.c b/orte/mca/ess/portals_utcp/ess_portals_utcp.h similarity index 65% rename from orte/mca/rds/base/rds_base_no_ops.c rename to orte/mca/ess/portals_utcp/ess_portals_utcp.h index 6982dc5793..d2335ec74b 100644 --- a/orte/mca/rds/base/rds_base_no_ops.c +++ b/orte/mca/ess/portals_utcp/ess_portals_utcp.h @@ -5,35 +5,26 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** @file: - */ -/* - * includes - */ -#include "orte_config.h" -#include "orte/orte_constants.h" +#ifndef ORTE_ESS_PORTALS_UTCP_H +#define ORTE_ESS_PORTALS_UTCP_H -#include "opal/class/opal_list.h" +BEGIN_C_DECLS -#include "orte/mca/rds/base/rds_private.h" +int orte_ess_portals_utcp_component_open(void); +int orte_ess_portals_utcp_component_close(void); +orte_ess_base_module_t* orte_ess_portals_utcp_component_init(int *priority); -int orte_rds_base_no_op_query(orte_jobid_t job) -{ - return ORTE_ERR_NOT_SUPPORTED; -} +END_C_DECLS -int orte_rds_base_no_op_store_resource(opal_list_t *resources) -{ - return ORTE_ERR_NOT_SUPPORTED; -} +#endif /* ORTE_ESS_PORTALS_UTCP_H */ diff --git a/orte/mca/sds/portals_utcp/sds_portals_utcp_component.c b/orte/mca/ess/portals_utcp/ess_portals_utcp_component.c similarity index 66% rename from orte/mca/sds/portals_utcp/sds_portals_utcp_component.c rename to orte/mca/ess/portals_utcp/ess_portals_utcp_component.c index e4c7c8f85a..1a369ade6e 100644 --- a/orte/mca/sds/portals_utcp/sds_portals_utcp_component.c +++ b/orte/mca/ess/portals_utcp/ess_portals_utcp_component.c @@ -23,25 +23,26 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/portals_utcp/sds_portals_utcp.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_portals_utcp_module; +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/portals_utcp/ess_portals_utcp.h" + +extern orte_ess_base_module_t orte_ess_portals_utcp_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_portals_utcp_component = { +orte_ess_base_component_t mca_ess_portals_utcp_component = { /* First, the mca_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ "portals_utcp", @@ -50,8 +51,8 @@ orte_sds_base_component_t mca_sds_portals_utcp_component = { ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_portals_utcp_component_open, - orte_sds_portals_utcp_component_close + orte_ess_portals_utcp_component_open, + orte_ess_portals_utcp_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -61,41 +62,35 @@ orte_sds_base_component_t mca_sds_portals_utcp_component = { }, /* Initialization / querying functions */ - orte_sds_portals_utcp_component_init + orte_ess_portals_utcp_component_init }; int -orte_sds_portals_utcp_component_open(void) +orte_ess_portals_utcp_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_portals_utcp_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_portals_utcp_component_init(int *priority) { - int id; - char *mode; - - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - /* if mode isn't NULL, then we have an ORTE starter. Don't use - this component */ - if (NULL != mode) { - free(mode); - return NULL; - } + /* since we are not launched by an ORTE launcher, + * we want to be selected ahead of the singleton + * component if we detect our supported environment. + * So ensure that our priority is higher than + * the singleton's + */ if (NULL == getenv("PTL_MY_RID")) return NULL; *priority = 60; - return &orte_sds_portals_utcp_module; + return &orte_ess_portals_utcp_module; } int -orte_sds_portals_utcp_component_close(void) +orte_ess_portals_utcp_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c b/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c new file mode 100644 index 0000000000..a08b7ae833 --- /dev/null +++ b/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/util/output.h" + +#include "orte/mca/errmgr/base/base.h" +#include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/portals_utcp/ess_portals_utcp.h" + +static int rte_init(char flags); +static int rte_finalize(void); +static void rte_abort(int status, bool report) __opal_attribute_noreturn__; + +orte_ess_base_module_t orte_ess_portals_utcp_module = { + rte_init, + rte_finalize, + rte_abort +}; + +static int rte_init(char flags) +{ + int rc, i, len, num_procs; + orte_vpid_t vpid; + char *vpid_string, *jobid_str; + char *nidmap_string; + + vpid_string = getenv("PTL_MY_RID"); + nidmap_string = getenv("PTL_NIDMAP"); + if (NULL == vpid_string || NULL == nidmap_string || + NULL == getenv("PTL_PIDMAP") || NULL == getenv("PTL_IFACE")) { + return ORTE_ERR_NOT_FOUND; + } + + /* Get our process information */ + + /* Procs in this environment are directly launched. Hence, there + * was no mpirun to create a jobid for us, and each app proc is + * going to have to fend for itself. For now, we assume that the + * jobid is some arbitrary number (say, 1). + */ + ORTE_PROC_MY_NAME->jobid = 1; /* not 0, since it has special meaning */ + + /* find our vpid assuming range starts at 0 */ + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + ORTE_PROC_MY_NAME->vpid = vpid; + + /* + * Get the number of procs in the job. We assume vpids start at 0. We + * assume that there are procs, since the nidmap is a + * : seperated list of nids, and the utcp reference implementation + * assumes all will be present + */ + len = strlen(nidmap_string); + n