diff --git a/acinclude.m4 b/acinclude.m4 index 5a4b1df370..b7cb7fb1a8 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -67,9 +67,7 @@ m4_include(config/ompi_check_icc.m4) m4_include(config/ompi_check_gm.m4) m4_include(config/ompi_check_mx.m4) m4_include(config/ompi_check_alps.m4) -m4_include(config/ompi_check_bproc.m4) m4_include(config/ompi_check_lsf.m4) -m4_include(config/ompi_check_xcpu.m4) m4_include(config/ompi_check_openib.m4) m4_include(config/ompi_check_portals.m4) m4_include(config/ompi_check_psm.m4) diff --git a/config/ompi_check_bproc.m4 b/config/ompi_check_bproc.m4 deleted file mode 100644 index 32dc951f89..0000000000 --- a/config/ompi_check_bproc.m4 +++ /dev/null @@ -1,67 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# new bproc is LANL versions >= 3.2.0 -# old bproc is all Scyld versions and LANL version < 3.2.0 -# OMPI_CHECK_BPROC(prefix, [action-if-new-bproc], [action-if-old-bproc], -# [action-if-not-found]) -# -------------------------------------------------------- -AC_DEFUN([OMPI_CHECK_BPROC],[ - AC_ARG_WITH([bproc], - [AC_HELP_STRING([--with-bproc], - [Directory where the BProc software is installed])]) - - AS_IF([test ! -z "$with_bproc" -a "$with_bproc" = "no"],[$4], [ - ompi_check_bproc_save_CPPFLAGS="$CPPFLAGS" - ompi_check_bproc_save_LDFLAGS="$LDFLAGS" - ompi_check_bproc_save_LIBS="$LIBS" - - AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"], - [CPPFLAGS="$CPPFLAGS -I$with_bproc/include" - LDFLAGS="$LDFLAGS -L$with_bproc/lib"]) - AC_CHECK_HEADERS([sys/bproc.h], - [AC_CHECK_LIB([bproc], - [bproc_numnodes], - [ompi_check_bproc_happy="yes"], - [ompi_check_bproc_happy="no"])], - [ompi_check_bproc_happy="no"]) - - # Check for Scyld bproc or an old version of LANL Bproc (pre 3.2.0) - AS_IF([test "$ompi_check_bproc_happy" = "yes"], - [AC_CHECK_HEADERS([sys/bproc_common.h],[ompi_check_bproc_happy="new"], - [ompi_check_bproc_happy="old"], - [#include - #include ])]) - - CPPFLAGS="$ompi_check_bproc_save_CPPFLAGS" - LDFLAGS="$ompi_check_bproc_save_LDFLAGS" - LIBS="$ompi_check_bproc_save_LIBS" - - AS_IF([test "$ompi_check_bproc_happy" != "no"], - [AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"], - [$1_CPPFLAGS="$$1_CPPFLAGS -I$with_bproc/include" - $1_LDFLAGS="$$1_LDFLAGS -L$with_bproc/lib"]) - $1_LIBS="$$1_LIBS -lbproc" - AS_IF([test "$ompi_check_bproc_happy" = "new"], [$2], [$3])], - [AS_IF([test ! -z "$with_bproc"], - [AC_MSG_ERROR([BProc support request but not found. Perhaps -you need to specify the location of the BProc libraries.])]) - $4]) - ]) -]) diff --git a/config/ompi_check_xcpu.m4 b/config/ompi_check_xcpu.m4 deleted file mode 100644 index 0c58f2b97e..0000000000 --- a/config/ompi_check_xcpu.m4 +++ /dev/null @@ -1,63 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -# OMPI_CHECK_XCPU(prefix, [action-if-found], [action-if-not-found]) -# -------------------------------------------------------- -AC_DEFUN([OMPI_CHECK_XCPU],[ - AC_ARG_WITH([xcpu], - [AC_HELP_STRING([--with-xcpu], - [=yes will Build XCPU launcher component (default: no)])]) - - AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" = "no"],[$3], [ - ompi_check_xcpu_save_CPPFLAGS="$CPPFLAGS" - ompi_check_xcpu_save_LDFLAGS="$LDFLAGS" - ompi_check_xcpu_save_LIBS="$LIBS" - - AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" != "yes"], - [CPPFLAGS="$CPPFLAGS -I$with_xcpu/include" - LDFLAGS="$LDFLAGS -L$with_xcpu/lib"]) - - AC_CHECK_HEADERS([libxcpu.h], - [AC_CHECK_LIB([xcpu], - [xp_command_create], - [ompi_check_xcpu_happy="yes"], - [ompi_check_xcpu_happy="no"], - [-lstrutil -lspclient -lspfs -lelf])], - [ompi_check_xcpu_happy="no"], - [#include - #include - #include - #include ]) - - CPPFLAGS="$ompi_check_xcpu_save_CPPFLAGS" - LDFLAGS="$ompi_check_xcpu_save_LDFLAGS" - LIBS="$ompi_check_xcpu_save_LIBS" - - AS_IF([test "$ompi_check_xcpu_happy" != "no"], - [AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" != "yes"], - [$1_CPPFLAGS="$$1_CPPFLAGS -I$with_xcpu/include" - $1_LDFLAGS="$$1_LDFLAGS -L$with_xcpu/lib"]) - $1_LIBS="$$1_LIBS -lxcpu -lstrutil -lspclient -lspfs -lelf" $2], - [AS_IF([test ! -z "$with_xcpu"], - [AC_MSG_ERROR([Xcpu support request but not found. Perhaps -you need to specify the location of the Xcpu libraries.])]) - $3]) - ]) -]) diff --git a/config/ompi_configure_options.m4 b/config/ompi_configure_options.m4 index d7733865e2..8e5ddbb0de 100644 --- a/config/ompi_configure_options.m4 +++ b/config/ompi_configure_options.m4 @@ -546,6 +546,25 @@ AC_DEFINE_UNQUOTED([ORTE_ENABLE_JUMBO_APPS], [$orte_want_jumbo_apps], [Enable support for applications in excess of 32K processes and/or 32K jobs, or running on clusters in excess of 32k nodes]) +# +# Minimal RTE support +# + +AC_MSG_CHECKING([if want full RTE support]) +AC_ARG_ENABLE([rte], + [AC_HELP_STRING([--disable-rte-support], + [Disable RTE support for systems that do not require it (default: full RTE support enabled)])]) +if test "$enable_rte_support" = "no"; then + AC_MSG_RESULT([no]) + orte_disable_full_support=1 +else + AC_MSG_RESULT([yes]) + orte_disable_full_support=0 +fi +AC_DEFINE_UNQUOTED([ORTE_DISABLE_FULL_SUPPORT], [$orte_disable_full_support], + [Enable full RTE support]) +AM_CONDITIONAL(ORTE_DISABLE_FULL_SUPPORT, test "$enable_rte_support" = "no") + # # Cross-compile data # diff --git a/config/ompi_mca.m4 b/config/ompi_mca.m4 index 56a8bcf085..1945e9df4c 100644 --- a/config/ompi_mca.m4 +++ b/config/ompi_mca.m4 @@ -50,7 +50,7 @@ AC_DEFUN([OMPI_MCA],[ AC_ARG_ENABLE([mca-no-build], [AC_HELP_STRING([--enable-mca-no-build=LIST], [Comma-separated list of - pairs - that will not be built. Example: "--enable-mca-no-build=maffinity-libnuma,btl-portals" will disable building both the "libnuma" maffinity and "portals" btl components.])]) + that will not be built. Example: "--enable-mca-no-build=maffinity,btl-portals" will disable building all maffinity components and the "portals" btl components.])]) AC_ARG_ENABLE(mca-dso, AC_HELP_STRING([--enable-mca-dso=LIST], [Comma-separated list of types and/or @@ -88,9 +88,13 @@ AC_DEFUN([OMPI_MCA],[ for item in $enable_mca_no_build; do type="`echo $item | cut -s -f1 -d-`" comp="`echo $item | cut -s -f2- -d-`" - if test -z $type -o -z $comp ; then - AC_MSG_ERROR([*** The enable-no-build flag requires a -*** list of type-component pairs. Invalid input detected.]) + if test -z $type ; then + type=$item + fi + if test -z $comp ; then + str="`echo DISABLE_${type}=1 | sed s/-/_/g`" + eval $str + msg="$item $msg" else str="`echo DISABLE_${type}_${comp}=1 | sed s/-/_/g`" eval $str @@ -359,9 +363,13 @@ AC_DEFUN([MCA_CONFIGURE_FRAMEWORK],[ # abort with a reasonable message. m4_ifdef([mca_$2_no_config_component_list], [], [m4_fatal([Could not find mca_$2_no_config_component_list - rerun autogen.sh without -l])]) + # make sure priority stuff set right m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST], [m4_ifval(mca_$2_no_config_component_list, [m4_fatal([Framework $2 using STOP_AT_FIRST but at least one component has no configure.m4])])]) + m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST_PRIORITY], + [m4_ifval(mca_$2_no_config_component_list, + [m4_fatal([Framework $2 using STOP_AT_FIRST but at least one component has no configure.m4])])]) m4_foreach(mca_component, [mca_$2_no_config_component_list], [m4_ifval(mca_component, [MCA_CONFIGURE_NO_CONFIG_COMPONENT($1, $2, mca_component, @@ -404,7 +412,7 @@ AC_DEFUN([MCA_CONFIGURE_FRAMEWORK],[ # It would be really hard to run these for "find first that # works", so we don't :) m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST], [], - [m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST], [], + [m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST_PRIORITY], [], [AS_IF([test "$3" != "0"], [MCA_CONFIGURE_ALL_CONFIG_COMPONENTS($1, $2, [all_components], [static_components], [dso_components], @@ -876,6 +884,11 @@ AC_DEFUN([MCA_COMPONENT_BUILD_CHECK],[ fi # if we were explicitly disabled, don't build :) + str="DISABLED_COMPONENT_CHECK=\$DISABLE_${framework}" + eval $str + if test "$DISABLED_COMPONENT_CHECK" = "1" ; then + want_component=0 + fi str="DISABLED_COMPONENT_CHECK=\$DISABLE_${framework}_$component" eval $str if test "$DISABLED_COMPONENT_CHECK" = "1" ; then diff --git a/configure.ac b/configure.ac index ae5a39db1c..ceb8bb8a41 100644 --- a/configure.ac +++ b/configure.ac @@ -1263,11 +1263,7 @@ AC_CONFIG_FILES([ orte/include/Makefile orte/etc/Makefile - orte/tools/orteboot/Makefile orte/tools/orted/Makefile - orte/tools/ortehalt/Makefile - orte/tools/ortekill/Makefile - orte/tools/orteprobe/Makefile orte/tools/orterun/Makefile orte/tools/wrappers/Makefile orte/tools/wrappers/ortecc-wrapper-data.txt @@ -1304,6 +1300,7 @@ AC_CONFIG_FILES([ ompi/tools/wrappers/mpif77-wrapper-data.txt ompi/tools/wrappers/mpif90-wrapper-data.txt ompi/tools/ortetools/Makefile + ompi/tools/ompi-server/Makefile test/Makefile test/event/Makefile diff --git a/contrib/platform/cray_xt3_romio b/contrib/platform/cray_xt3_romio index 63a4551495..ce36fb529c 100755 --- a/contrib/platform/cray_xt3_romio +++ b/contrib/platform/cray_xt3_romio @@ -6,7 +6,8 @@ enable_pretty_print_stacktrace=no enable_dlopen=no with_portals_config=redstorm with_memory_manager=none -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,gpr-proxy,gpr-replica,iof-svc,ns-proxy,oob-tcp,pls-rsh,ras-dash_host,ras-hostfile,ras-localhost,rds-hostfile,rds-resfile,rmaps-round_robin,rmgr-proxy,rmgr-urm,rml-oob,sds-env,sds-seed,sds-singleton,btl-sm,btl-self,btl-tcp,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,filem-rsh,grpcomm-basic +enable_mca_no_build=carto-file,maffinity,paffinity,timer,errmgr,iof,odls,oob,plm,ras,rmaps,rml,routed,filem,btl-sm,btl-self,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,libnbc,vt +with_rte_support=no enable_heterogeneous=no enable_pty_support=no enable_mem_debug=no diff --git a/contrib/platform/cray_xt_cnl_romio b/contrib/platform/cray_xt_cnl_romio index 1cb85bacd6..10dd9c6580 100755 --- a/contrib/platform/cray_xt_cnl_romio +++ b/contrib/platform/cray_xt_cnl_romio @@ -1,3 +1,7 @@ +enable_mem_debug=no +enable_mem_profile=no +enable_debug=no +enable_debug_symbols=no enable_io_romio=yes enable_static=yes enable_shared=no @@ -6,12 +10,9 @@ enable_pretty_print_stacktrace=no enable_dlopen=no with_portals_config=cnl_modex with_memory_manager=none -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,pls-rsh,pml-dr,filem-rsh,grpcomm-cnos,pls-cnos,rmgr-cnos,rml-cnos,routed-cnos,sds-portals_utcp,sds-cnos +enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,ess-cnos,pml-dr,filem-rsh,grpcomm-cnos,rmgr-cnos,rml-cnos enable_heterogeneous=no enable_pty_support=no -enable_mem_debug=no -enable_mem_profile=no -enable_debug_symbols=no enable_binaries=yes ompi_cv_f77_sizeof_LOGICAL=${ompi_cv_f77_sizeof_LOGICAL=4} diff --git a/contrib/platform/lanl/roadrunner/debug b/contrib/platform/lanl/roadrunner/debug new file mode 100644 index 0000000000..134ddd3774 --- /dev/null +++ b/contrib/platform/lanl/roadrunner/debug @@ -0,0 +1,20 @@ +with_threads=no +enable_dlopen=no +enable_pty_support=no +with_tm=/opt/PBS +with_wrapper_cflags=-I/opt/panfs/include +LDFLAGS=-L/opt/PBS/lib64 +with_openib=/opt/ofed +with_io_romio_flags=--with-file-system=ufs+nfs+panfs +with_memory_manager=no +enable_mem_debug=yes +enable_mem_profile=no +enable_debug_symbols=yes +enable_binaries=yes +with_devel_headers=yes +enable_heterogeneous=yes +enable_picky=yes +enable_debug=yes +enable_shared=yes +enable_static=no +with_slurm=no diff --git a/contrib/platform/lanl/roadrunner/openmpi-mca-params.conf b/contrib/platform/lanl/roadrunner/openmpi-mca-params.conf new file mode 100644 index 0000000000..b4da08167e --- /dev/null +++ b/contrib/platform/lanl/roadrunner/openmpi-mca-params.conf @@ -0,0 +1,64 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This is the default system-wide MCA parameters defaults file. +# Specifically, the MCA parameter "mca_param_files" defaults to a +# value of +# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" +# (this file is the latter of the two). So if the default value of +# mca_param_files is not changed, this file is used to set system-wide +# MCA parameters. This file can therefore be used to set system-wide +# default MCA parameters for all users. Of course, users can override +# these values if they want, but this file is an excellent location +# for setting system-specific MCA parameters for those users who don't +# know / care enough to investigate the proper values for them. + +# Note that this file is only applicable where it is visible (in a +# filesystem sense). Specifically, MPI processes each read this file +# during their startup to determine what default values for MCA +# parameters should be used. mpirun does not bundle up the values in +# this file from the node where it was run and send them to all nodes; +# the default value decisions are effectively distributed. Hence, +# these values are only applicable on nodes that "see" this file. If +# $sysconf is a directory on a local disk, it is likely that changes +# to this file will need to be propagated to other nodes. If $sysconf +# is a directory that is shared via a networked filesystem, changes to +# this file will be visible to all nodes that share this $sysconf. + +# The format is straightforward: one per line, mca_param_name = +# rvalue. Quoting is ignored (so if you use quotes or escape +# characters, they'll be included as part of the value). For example: + +# Disable run-time MPI parameter checking +# mpi_param_check = 0 + +# Note that the value "~/" will be expanded to the current user's home +# directory. For example: + +# Change component loading path +# component_path = /usr/local/lib/openmpi:~/my_openmpi_components + +# See "ompi_info --param all all" for a full listing of Open MPI MCA +# parameters available and their default values. + +oob_tcp_if_include = ib0 +mpi_preconnect_oob = 1 +btl_sm_free_list_max = 768 +oob_tcp_connect_timeout = 600 +oob_tcp_if_include = ib0 diff --git a/contrib/platform/lanl/roadrunner/optimized b/contrib/platform/lanl/roadrunner/optimized new file mode 100644 index 0000000000..cbedace4af --- /dev/null +++ b/contrib/platform/lanl/roadrunner/optimized @@ -0,0 +1,18 @@ +with_threads=no +enable_dlopen=no +enable_pty_support=no +with_tm=/opt/PBS +LDFLAGS=-L/opt/PBS/lib64 +with_openib=/opt/ofed +with_memory_manager=yes +enable_mem_debug=no +enable_mem_profile=no +enable_debug_symbols=no +enable_binaries=yes +with_devel_headers=no +enable_heterogeneous=yes +enable_debug=no +enable_shared=yes +with_wrapper_cflags=-I/opt/panfs/include +with_io_romio_flags=--with-file-system=ufs+nfs+panfs +with_slurm=no diff --git a/contrib/platform/portals-ref-rte b/contrib/platform/portals-ref-rte index 4a5af66056..8b60362bc4 100644 --- a/contrib/platform/portals-ref-rte +++ b/contrib/platform/portals-ref-rte @@ -10,13 +10,9 @@ enable_dlopen=no with_portals_config=utcp with_memory_manager=no enable_heterogeneous=no -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,gpr-proxy,gpr-replica,rml-oob,btl-sm,mpool-sm,btl-self,ns-proxy,rds-resfile,rds-hostfile,sds-env,sds-pipe,sds-seed,sds-singleton,coll-hierarch,coll-sm,pml-dr,btl-tcp,oob-tcp,ras-dash_host,ras-hostfile,ras-localhost,rmaps-round_robin,rmgr-urm,rmgr-proxy,pls-fork,pls-rsh,common-sm,iof-svc -with_slurm=no -with_bproc=no -with_mvapi=no +enable_mca_no_build=maffinity,paffinity,timer,errmgr,iof,odls,oob,plm,ras,rmaps,rml,routed,btl-sm,mpool-sm,btl-self,coll-hierarch,coll-sm,pml-dr,btl-tcp,common-sm +with_rte_support=no with_openib=no with_gm=no with_mx=no -with_rml_cnos=utcp -with_rmgr_cnos=utcp enable_binaries=no diff --git a/contrib/platform/ps3 b/contrib/platform/ps3 index 8cb8085af3..80dd0ebf6d 100644 --- a/contrib/platform/ps3 +++ b/contrib/platform/ps3 @@ -5,7 +5,7 @@ with_threads=no enable_pretty_print_stacktrace=no enable_dlopen=no with_memory_manager=none -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,allocator-basic,rcache-vma,pls-gridengine,pls-slurm,ras-slurm,ras-gridengine,btl-sm,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,pml-cm,mpool-rdma,osc-rdma,sds-slurm,backtrace-darwin,memory-darwin,memory-malloc_hook,memory_ptmalloc2,paffinity-solaris,paffinity-windows,timer-aix,timer-altix,timer-darwin,timer-solaris,timer-windows +enable_mca_no_build=maffinity,paffinity,timer,allocator-basic,rcache-vma,plm-gridengine,plm-slurm,ras-slurm,ras-gridengine,btl-sm,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,pml-cm,mpool-rdma,osc-rdma,ess-slurm,backtrace-darwin,memory-darwin,memory-malloc_hook,memory_ptmalloc2 enable_heterogeneous=no enable_pty_support=no enable_mem_debug=no diff --git a/contrib/platform/redstorm b/contrib/platform/redstorm index 4d3937bf92..dff54a7d39 100644 --- a/contrib/platform/redstorm +++ b/contrib/platform/redstorm @@ -6,7 +6,8 @@ enable_pretty_print_stacktrace=no enable_dlopen=no with_portals_config=redstorm with_memory_manager=none -enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,gpr-proxy,gpr-replica,iof-svc,ns-proxy,oob-tcp,pls-rsh,ras-dash_host,ras-hostfile,ras-localhost,rds-hostfile,rds-resfile,rmaps-round_robin,rmgr-proxy,rmgr-urm,rml-oob,sds-env,sds-seed,sds-singleton,btl-sm,btl-self,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,filem-rsh,grpcomm-basic +enable_mca_no_build=carto-file,maffinity,paffinity,timer,errmgr,iof,odls,oob,plm,ras,rmaps,rml,routed,filem,btl-sm,btl-self,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,libnbc,vt +with_rte_support=no enable_heterogeneous=no enable_pty_support=no enable_mem_debug=no diff --git a/ompi/communicator/Makefile.am b/ompi/communicator/Makefile.am index 671630f62a..7eb0ddb8d3 100644 --- a/ompi/communicator/Makefile.am +++ b/ompi/communicator/Makefile.am @@ -25,6 +25,4 @@ headers += \ libmpi_la_SOURCES += \ communicator/comm_init.c \ communicator/comm.c \ - communicator/comm_cid.c \ - communicator/comm_dyn.c \ - communicator/comm_publish.c + communicator/comm_cid.c diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 7205a4a412..ab37e8d86c 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -23,7 +23,10 @@ #include #include "ompi/constants.h" -#include "orte/dss/dss.h" + +#include "opal/dss/dss.h" +#include "orte/util/name_fns.h" + #include "ompi/proc/proc.h" #include "opal/threads/mutex.h" #include "opal/util/bit_ops.h" @@ -31,7 +34,7 @@ #include "opal/util/convert.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" -#include "orte/mca/ns/ns.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" @@ -139,7 +142,7 @@ int ompi_comm_set ( ompi_communicator_t **ncomm, /* Check how many different jobids are represented in this communicator. Necessary for the disconnect of dynamic communicators. */ - ompi_comm_mark_dyncomm (newcomm); + ompi_dpm.mark_dyncomm (newcomm); /* Set error handler */ newcomm->error_handler = errh; @@ -1035,7 +1038,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, ompi_proc_t **rprocs=NULL; orte_std_cntr_t size_len; int int_len, rlen; - orte_buffer_t *sbuf=NULL, *rbuf=NULL; + opal_buffer_t *sbuf=NULL, *rbuf=NULL; void *sendbuf; char *recvbuf; ompi_proc_t **proc_list=NULL; @@ -1045,7 +1048,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, local_size = ompi_comm_size (local_comm); if (local_rank == local_leader) { - sbuf = OBJ_NEW(orte_buffer_t); + sbuf = OBJ_NEW(opal_buffer_t); if (NULL == sbuf) { rc = ORTE_ERROR; goto err_exit; @@ -1065,7 +1068,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, if ( OMPI_SUCCESS != rc ) { goto err_exit; } - if (ORTE_SUCCESS != (rc = orte_dss.unload(sbuf, &sendbuf, &size_len))) { + if (ORTE_SUCCESS != (rc = opal_dss.unload(sbuf, &sendbuf, &size_len))) { goto err_exit; } @@ -1131,13 +1134,13 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, goto err_exit; } - rbuf = OBJ_NEW(orte_buffer_t); + rbuf = OBJ_NEW(opal_buffer_t); if (NULL == rbuf) { rc = ORTE_ERROR; goto err_exit; } - if (ORTE_SUCCESS != (rc = orte_dss.load(rbuf, recvbuf, rlen))) { + if (ORTE_SUCCESS != (rc = opal_dss.load(rbuf, recvbuf, rlen))) { goto err_exit; } @@ -1250,7 +1253,7 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high ) theirproc = ompi_group_peer_lookup(intercomm->c_remote_group,0); mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; - rc = orte_ns.compare_fields(mask, &(ourproc->proc_name), &(theirproc->proc_name)); + rc = orte_util_compare_name_fields(mask, &(ourproc->proc_name), &(theirproc->proc_name)); if ( 0 > rc ) { flag = true; } @@ -1611,7 +1614,7 @@ static int ompi_comm_fill_rest (ompi_communicator_t *comm, /* verify whether to set the flag, that this comm contains process from more than one jobid. */ - ompi_comm_mark_dyncomm (comm); + ompi_dpm.mark_dyncomm (comm); /* set the error handler */ comm->error_handler = errh; diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index 39348f5640..4c46d19b47 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -22,9 +22,9 @@ #include "ompi_config.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "opal/util/convert.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" #include "ompi/communicator/communicator.h" #include "ompi/proc/proc.h" #include "ompi/constants.h" @@ -35,10 +35,10 @@ #include "orte/mca/rml/rml.h" #include "ompi/request/request.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/mca/dpm/dpm.h" + +BEGIN_C_DECLS -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif /** * These functions make sure, that we determine the global result over * an intra communicators (simple), an inter-communicator and a @@ -774,26 +774,26 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf, } if (local_rank == local_leader ) { - orte_buffer_t *sbuf; - orte_buffer_t *rbuf; + opal_buffer_t *sbuf; + opal_buffer_t *rbuf; - sbuf = OBJ_NEW(orte_buffer_t); - rbuf = OBJ_NEW(orte_buffer_t); + sbuf = OBJ_NEW(opal_buffer_t); + rbuf = OBJ_NEW(opal_buffer_t); - if (ORTE_SUCCESS != (rc = orte_dss.pack(sbuf, tmpbuf, (orte_std_cntr_t)count, ORTE_INT))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(sbuf, tmpbuf, (orte_std_cntr_t)count, OPAL_INT))) { goto exit; } if ( send_first ) { - rc = orte_rml.send_buffer(remote_leader, sbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0); - rc = orte_rml.recv_buffer(remote_leader, rbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0); + rc = orte_rml.send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0); + rc = orte_rml.recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0); } else { - rc = orte_rml.recv_buffer(remote_leader, rbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0); - rc = orte_rml.send_buffer(remote_leader, sbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0); + rc = orte_rml.recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0); + rc = orte_rml.send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0); } - if (ORTE_SUCCESS != (rc = orte_dss.unpack(rbuf, outbuf, &size_count, ORTE_INT))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, outbuf, &size_count, OPAL_INT))) { goto exit; } OBJ_RELEASE(sbuf); @@ -834,6 +834,5 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf, return (rc); } -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif + +END_C_DECLS diff --git a/ompi/communicator/comm_dyn.c b/ompi/communicator/comm_dyn.c deleted file mode 100644 index c766f2c8b3..0000000000 --- a/ompi/communicator/comm_dyn.c +++ /dev/null @@ -1,1069 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2007 Cisco, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include -#ifdef HAVE_SYS_UIO_H -#include -#endif -#ifdef HAVE_NET_UIO_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ - -#include "opal/util/opal_environ.h" -#include "opal/util/printf.h" -#include "opal/util/convert.h" -#include "opal/threads/mutex.h" -#include "opal/util/bit_ops.h" -#include "opal/util/argv.h" - -#include "ompi/communicator/communicator.h" -#include "ompi/request/request.h" -#include "ompi/errhandler/errhandler.h" -#include "ompi/proc/proc.h" -#include "ompi/info/info.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/runtime/ompi_module_exchange.h" - -#include "orte/util/proc_info.h" -#include "orte/dss/dss.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ras/ras_types.h" -#include "orte/mca/rmaps/rmaps_types.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/smr/smr_types.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/grpcomm/grpcomm.h" - -#include "orte/runtime/runtime.h" - -static int ompi_comm_get_rport (orte_process_name_t *port, - int send_first, struct ompi_proc_t *proc, - orte_rml_tag_t tag, orte_process_name_t *rport); - - -int ompi_comm_connect_accept ( ompi_communicator_t *comm, int root, - orte_process_name_t *port, int send_first, - ompi_communicator_t **newcomm, orte_rml_tag_t tag ) -{ - int size, rsize, rank, rc; - orte_std_cntr_t num_vals; - orte_std_cntr_t rnamebuflen = 0; - int rnamebuflen_int = 0; - void *rnamebuf=NULL; - - ompi_communicator_t *newcomp=MPI_COMM_NULL; - ompi_proc_t **rprocs=NULL; - ompi_group_t *group=comm->c_local_group; - orte_process_name_t *rport=NULL, tmp_port_name; - orte_buffer_t *nbuf=NULL, *nrbuf=NULL; - ompi_proc_t **proc_list=NULL, **new_proc_list; - int i,j, new_proc_len; - ompi_group_t *new_group_pointer; - - size = ompi_comm_size ( comm ); - rank = ompi_comm_rank ( comm ); - - /* tell the progress engine to tick the event library more - often, to make sure that the OOB messages get sent */ - opal_progress_event_users_increment(); - - if ( rank == root ) { - /* The process receiving first does not have yet the contact - information of the remote process. Therefore, we have to - exchange that. - */ - - if(!OMPI_GROUP_IS_DENSE(group)) { - proc_list = (ompi_proc_t **) calloc (group->grp_proc_count, - sizeof (ompi_proc_t *)); - for(i=0 ; igrp_proc_count ; i++) - proc_list[i] = ompi_group_peer_lookup(group,i); - } - - if ( OMPI_COMM_JOIN_TAG != (int)tag ) { - if(OMPI_GROUP_IS_DENSE(group)){ - rc = ompi_comm_get_rport(port,send_first, - group->grp_proc_pointers[rank], tag, - &tmp_port_name); - } - else { - rc = ompi_comm_get_rport(port,send_first, - proc_list[rank], tag, - &tmp_port_name); - } - if (OMPI_SUCCESS != rc) { - return rc; - } - rport = &tmp_port_name; - } else { - rport = port; - } - - /* Generate the message buffer containing the number of processes and the list of - participating processes */ - nbuf = OBJ_NEW(orte_buffer_t); - if (NULL == nbuf) { - return OMPI_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(nbuf, &size, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - if(OMPI_GROUP_IS_DENSE(group)) { - ompi_proc_pack(group->grp_proc_pointers, size, nbuf); - } - else { - ompi_proc_pack(proc_list, size, nbuf); - } - - nrbuf = OBJ_NEW(orte_buffer_t); - if (NULL == nrbuf ) { - rc = OMPI_ERROR; - goto exit; - } - - /* Exchange the number and the list of processes in the groups */ - if ( send_first ) { - rc = orte_rml.send_buffer(rport, nbuf, tag, 0); - rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); - } else { - rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); - rc = orte_rml.send_buffer(rport, nbuf, tag, 0); - } - - if (ORTE_SUCCESS != (rc = orte_dss.unload(nrbuf, &rnamebuf, &rnamebuflen))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - } - - /* First convert the size_t to an int so we can cast in the bcast to a void * - * if we don't then we will get badness when using big vs little endian - * THIS IS NO LONGER REQUIRED AS THE LENGTH IS NOW A STD_CNTR_T, WHICH - * CORRELATES TO AN INT32 - */ - rnamebuflen_int = (int)rnamebuflen; - - /* bcast the buffer-length to all processes in the local comm */ - rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm, - comm->c_coll.coll_bcast_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - rnamebuflen = rnamebuflen_int; - - if ( rank != root ) { - /* non root processes need to allocate the buffer manually */ - rnamebuf = (char *) malloc(rnamebuflen); - if ( NULL == rnamebuf ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - /* bcast list of processes to all procs in local group - and reconstruct the data. Note that proc_get_proclist - adds processes, which were not known yet to our - process pool. - */ - rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm, - comm->c_coll.coll_bcast_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - nrbuf = OBJ_NEW(orte_buffer_t); - if (NULL == nrbuf) { - goto exit; - } - if ( ORTE_SUCCESS != ( rc = orte_dss.load(nrbuf, rnamebuf, rnamebuflen))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - num_vals = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(nrbuf, &rsize, &num_vals, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - rc = ompi_proc_unpack(nrbuf, rsize, &rprocs, &new_proc_len, &new_proc_list); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - /* If we added new procs, we need to do the modex and then call - PML add_procs */ - if (new_proc_len > 0) { - opal_list_t all_procs; - orte_namelist_t *name; - orte_buffer_t mdx_buf, rbuf; - - OBJ_CONSTRUCT(&all_procs, opal_list_t); - - if (send_first) { - for (i = 0 ; i < group->grp_proc_count ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = &(ompi_group_peer_lookup(group, i)->proc_name); - opal_list_append(&all_procs, &name->item); - } - - for (i = 0 ; i < rsize ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = &(rprocs[i]->proc_name); - opal_list_append(&all_procs, &name->item); - } - } else { - for (i = 0 ; i < rsize ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = &(rprocs[i]->proc_name); - opal_list_append(&all_procs, &name->item); - } - - for (i = 0 ; i < group->grp_proc_count ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = &(ompi_group_peer_lookup(group, i)->proc_name); - opal_list_append(&all_procs, &name->item); - } - } - - OBJ_CONSTRUCT(&mdx_buf, orte_buffer_t); - if (OMPI_SUCCESS != (rc = ompi_modex_get_my_buffer(&mdx_buf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - OBJ_CONSTRUCT(&rbuf, orte_buffer_t); - if (OMPI_SUCCESS != (rc = orte_grpcomm.allgather_list(&all_procs, - &mdx_buf, - &rbuf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - OBJ_DESTRUCT(&mdx_buf); - - if (OMPI_SUCCESS != (rc = ompi_modex_process_data(&rbuf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - OBJ_DESTRUCT(&rbuf); - - /* - while (NULL != (item = opal_list_remove_first(&all_procs))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&all_procs); - */ - - MCA_PML_CALL(add_procs(new_proc_list, new_proc_len)); - } - - OBJ_RELEASE(nrbuf); - if ( rank == root ) { - OBJ_RELEASE(nbuf); - } - - new_group_pointer=ompi_group_allocate(rsize); - if( NULL == new_group_pointer ) { - return MPI_ERR_GROUP; - } - - /* put group elements in the list */ - for (j = 0; j < rsize; j++) { - new_group_pointer->grp_proc_pointers[j] = rprocs[j]; - } /* end proc loop */ - - /* increment proc reference counters */ - ompi_group_increment_proc_count(new_group_pointer); - - /* set up communicator structure */ - rc = ompi_comm_set ( &newcomp, /* new comm */ - comm, /* old comm */ - group->grp_proc_count, /* local_size */ - NULL, /* local_procs */ - rsize, /* remote_size */ - NULL , /* remote_procs */ - NULL, /* attrs */ - comm->error_handler, /* error handler */ - NULL, /* topo component */ - group, /* local group */ - new_group_pointer /* remote group */ - ); - if ( NULL == newcomp ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - ompi_group_decrement_proc_count (new_group_pointer); - OBJ_RELEASE(new_group_pointer); - new_group_pointer = MPI_GROUP_NULL; - - /* allocate comm_cid */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - rport, /* remote leader */ - OMPI_COMM_CID_INTRA_OOB, /* mode */ - send_first ); /* send or recv first */ - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - /* activate comm and init coll-component */ - rc = ompi_comm_activate ( newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - rport, /* remote leader */ - OMPI_COMM_CID_INTRA_OOB, /* mode */ - send_first, /* send or recv first */ - 0); /* sync_flag */ - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - /* Question: do we have to re-start some low level stuff - to enable the usage of fast communication devices - between the two worlds ? - */ - - - exit: - /* done with OOB and such - slow our tick rate again */ - opal_progress(); - opal_progress_event_users_decrement(); - - if ( NULL != rprocs ) { - free ( rprocs ); - } - if ( NULL != proc_list ) { - free ( proc_list ); - } - if ( OMPI_SUCCESS != rc ) { - if ( MPI_COMM_NULL != newcomp && NULL != newcomp ) { - OBJ_RETAIN(newcomp); - newcomp = MPI_COMM_NULL; - } - } - - *newcomm = newcomp; - return rc; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* - * This routine is necessary, since in the connect/accept case, the processes - * executing the connect operation have the OOB contact information of the - * leader of the remote group, however, the processes executing the - * accept get their own port_name = OOB contact information passed in as - * an argument. This is however useless. - * - * Therefore, the two root processes exchange this information at this - * point. - * - */ -int ompi_comm_get_rport(orte_process_name_t *port, int send_first, - ompi_proc_t *proc, orte_rml_tag_t tag, - orte_process_name_t *rport_name) -{ - int rc; - orte_std_cntr_t num_vals; - - if ( send_first ) { - orte_buffer_t *sbuf; - - sbuf = OBJ_NEW(orte_buffer_t); - if (NULL == sbuf) { - return OMPI_ERROR; - } - if (ORTE_SUCCESS != (rc = orte_dss.pack(sbuf, &(proc->proc_name), 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(sbuf); - return rc; - } - - rc = orte_rml.send_buffer(port, sbuf, tag, 0); - OBJ_RELEASE(sbuf); - if ( 0 > rc ) { - ORTE_ERROR_LOG(rc); - return rc; - } - - *rport_name = *port; - } else { - orte_buffer_t *rbuf; - - rbuf = OBJ_NEW(orte_buffer_t); - if (NULL == rbuf) { - return ORTE_ERROR; - } - if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, rbuf, tag, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rbuf); - return rc; - } - - num_vals = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(rbuf, rport_name, &num_vals, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rbuf); - return rc; - } - OBJ_RELEASE(rbuf); - } - - return OMPI_SUCCESS; -} - - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -int -ompi_comm_start_processes(int count, char **array_of_commands, - char ***array_of_argv, - int *array_of_maxprocs, - MPI_Info *array_of_info, - char *port_name) -{ - int rc, i, j, counter; - int have_wdir=0; - bool have_prefix; - int valuelen=OMPI_PATH_MAX, flag=0; - char cwd[OMPI_PATH_MAX]; - char host[OMPI_PATH_MAX]; /*** should define OMPI_HOST_MAX ***/ - char prefix[OMPI_PATH_MAX]; - char *base_prefix; - - orte_std_cntr_t num_apps, ai; - orte_jobid_t new_jobid=ORTE_JOBID_INVALID; - orte_app_context_t **apps=NULL; - - opal_list_t attributes; - opal_list_item_t *item; - - bool timing = false; - struct timeval ompistart, ompistop; - int param, value; - - /* parse the info object */ - /* check potentially for: - - "host": desired host where to spawn the processes - - "prefix": the path to the root of the directory tree where ompi - executables and libraries can be found - - "arch": desired architecture - - "wdir": directory, where executable can be found - - "path": list of directories where to look for the executable - - "file": filename, where additional information is provided. - - "soft": see page 92 of MPI-2. - */ - - /* make sure the progress engine properly trips the event library */ - opal_progress_event_users_increment(); - - /* check to see if we want timing information */ - param = mca_base_param_reg_int_name("ompi", "timing", - "Request that critical timing loops be measured", - false, false, 0, &value); - if (value != 0) { - timing = true; - if (0 != gettimeofday(&ompistart, NULL)) { - opal_output(0, "ompi_comm_start_procs: could not obtain start time"); - ompistart.tv_sec = 0; - ompistart.tv_usec = 0; - } - } - - /* setup to record the attributes */ - OBJ_CONSTRUCT(&attributes, opal_list_t); - - /* we want to be able to default the prefix to the one used for this job - * so that the ompi executables and libraries can be found. the user can - * later override this value by providing an MPI_Info value. for now, though, - * let's get the default value off the registry - */ - rc = orte_rmgr.get_app_context(orte_process_info.my_name->jobid, &apps, &num_apps); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* we'll just use the prefix from the first member of the app_context array. - * this shouldn't matter as they all should be the same. it could be NULL, of - * course (user might not have specified it), so we need to protect against that. - * - * It's possible that no app_contexts are returned (e.g., during a comm_spawn - * from a singleton), so check first - */ - if (NULL != apps && NULL != apps[0]->prefix_dir) { - base_prefix = strdup(apps[0]->prefix_dir); - } else { - base_prefix = NULL; - } - /* cleanup the memory we used */ - if(NULL != apps) { - for (ai = 0; ai < num_apps; ai++) { - OBJ_RELEASE(apps[ai]); - } - free(apps); - } - - /* Convert the list of commands to an array of orte_app_context_t - pointers */ - apps = (orte_app_context_t**)malloc(count * sizeof(orte_app_context_t *)); - if (NULL == apps) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - for (i = 0; i < count; ++i) { - apps[i] = OBJ_NEW(orte_app_context_t); - if (NULL == apps[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - /* rollback what was already done */ - for (j=0; j < i; j++) OBJ_RELEASE(apps[j]); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* copy over the name of the executable */ - apps[i]->app = strdup(array_of_commands[i]); - if (NULL == apps[i]->app) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - /* rollback what was already done */ - for (j=0; j < i; j++) OBJ_RELEASE(apps[j]); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* record the number of procs to be generated */ - apps[i]->num_procs = array_of_maxprocs[i]; - - /* copy over the argv array */ - counter = 1; - - if (MPI_ARGVS_NULL != array_of_argv && - MPI_ARGV_NULL != array_of_argv[i]) { - /* first need to find out how many entries there are */ - j=0; - while (NULL != array_of_argv[i][j]) { - j++; - } - counter += j; - } - - /* now copy them over, ensuring to NULL terminate the array */ - apps[i]->argv = (char**)malloc((1 + counter) * sizeof(char*)); - if (NULL == apps[i]->argv) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - /* rollback what was already done */ - for (j=0; j < i; j++) { - OBJ_RELEASE(apps[j]); - } - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - apps[i]->argv[0] = strdup(array_of_commands[i]); - for (j=1; j < counter; j++) { - apps[i]->argv[j] = strdup(array_of_argv[i][j-1]); - } - apps[i]->argv[counter] = NULL; - - - /* the environment gets set by the launcher - * all we need to do is add the specific values - * needed for comm_spawn - */ - /* Add environment variable with the contact information for the - child processes. - */ - counter = 1; - apps[i]->env = (char**)malloc((1+counter) * sizeof(char*)); - if (NULL == apps[i]->env) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - /* rollback what was already done */ - for (j=0; j < i; j++) OBJ_RELEASE(apps[j]); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - asprintf(&(apps[i]->env[0]), "OMPI_PARENT_PORT=%s", port_name); - apps[i]->env[1] = NULL; - for (j = 0; NULL != environ[j]; ++j) { - if (0 == strncmp("OMPI_", environ[j], 5)) { - opal_argv_append_nosize(&apps[i]->env, environ[j]); - } - } - - /* Check for well-known info keys */ - have_wdir = 0; - have_prefix = false; - if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { - - /* check for 'wdir' */ - ompi_info_get (array_of_info[i], "wdir", valuelen, cwd, &flag); - if ( flag ) { - apps[i]->cwd = strdup(cwd); - have_wdir = 1; - } - - /* check for 'host' */ - ompi_info_get (array_of_info[i], "host", sizeof(host), host, &flag); - if ( flag ) { - apps[i]->num_map = 1; - apps[i]->map_data = (orte_app_context_map_t **) malloc(sizeof(orte_app_context_map_t *)); - apps[i]->map_data[0] = OBJ_NEW(orte_app_context_map_t); - apps[i]->map_data[0]->map_type = ORTE_APP_CONTEXT_MAP_HOSTNAME; - apps[i]->map_data[0]->map_data = strdup(host); - } - - /* 'path', 'arch', 'file', 'soft' -- to be implemented */ - - /* check for 'ompi_prefix' (OMPI-specific -- to effect the same - * behavior as --prefix option to orterun) - */ - ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix), prefix, &flag); - if ( flag ) { - apps[i]->prefix_dir = strdup(prefix); - have_prefix = true; - } - } - - /* default value: If the user did not tell us where to look for the - executable, we assume the current working directory */ - if ( !have_wdir ) { - getcwd(cwd, OMPI_PATH_MAX); - apps[i]->cwd = strdup(cwd); - } - - /* if the user told us a new prefix, then we leave it alone. otherwise, if - * a prefix had been provided before, copy that one into the new app_context - * for use by the spawned children - */ - if ( !have_prefix && NULL != base_prefix) { - apps[i]->prefix_dir = strdup(base_prefix); - } - - /* leave the map info alone - the launcher will - * decide where to put things - */ - } /* for (i = 0 ; i < count ; ++i) */ - - /* cleanup */ - if (NULL != base_prefix) { - free(base_prefix); - } - - /* tell the RTE that we want to be the new job to be a child of this process' job */ - if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_NS_USE_PARENT, - ORTE_JOBID, &(orte_process_info.my_name->jobid), - ORTE_RMGR_ATTR_OVERRIDE))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attributes); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - - /* tell the RTE that we want to the children to run inside of our allocation - - * don't go get one just for them - */ - if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RAS_USE_PARENT_ALLOCATION, - ORTE_JOBID, &(orte_process_info.my_name->jobid), - ORTE_RMGR_ATTR_OVERRIDE))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attributes); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - - /* tell the RTE that we want the children mapped the same way as their parent */ - if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMAPS_USE_PARENT_PLAN, - ORTE_JOBID, &(orte_process_info.my_name->jobid), - ORTE_RMGR_ATTR_OVERRIDE))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attributes); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - -#if 0 - /* tell the RTE that we want to be cross-connected to the children so we receive - * their ORTE-level information - e.g., OOB contact info - when they - * reach the STG1 stage gate - */ - state = ORTE_PROC_STATE_AT_STG1; - if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMGR_XCONNECT_AT_SPAWN, - ORTE_PROC_STATE, &state, - ORTE_RMGR_ATTR_OVERRIDE))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attributes); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } -#endif - - /* check for timing request - get stop time and report elapsed time if so */ - if (timing) { - if (0 != gettimeofday(&ompistop, NULL)) { - opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); - } else { - opal_output(0, "ompi_comm_start_procs: time from start to prepare to spawn %ld usec", - (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + - (ompistop.tv_usec - ompistart.tv_usec))); - if (0 != gettimeofday(&ompistart, NULL)) { - opal_output(0, "ompi_comm_start_procs: could not obtain new start time"); - ompistart.tv_sec = ompistop.tv_sec; - ompistart.tv_usec = ompistop.tv_usec; - } - } - } - - /* spawn procs */ - rc = orte_rmgr.spawn_job(apps, count, &new_jobid, 0, NULL, NULL, - ORTE_PROC_STATE_NONE, &attributes); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - - /* check for timing request - get stop time and report elapsed time if so */ - if (timing) { - if (0 != gettimeofday(&ompistop, NULL)) { - opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); - } else { - opal_output(0, "ompi_comm_start_procs: time to spawn %ld usec", - (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + - (ompistop.tv_usec - ompistart.tv_usec))); - } - } - - /* clean up */ - opal_progress_event_users_decrement(); - while (NULL != (item = opal_list_remove_first(&attributes))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&attributes); - - for ( i=0; ic_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT"); - } - - return OMPI_SUCCESS; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* this routine runs through the list of communicators and - and does the disconnect for all dynamic communicators */ -int ompi_comm_dyn_finalize (void) -{ - int i,j=0, max=0; - ompi_comm_disconnect_obj **objs=NULL; - ompi_communicator_t *comm=NULL; - - if ( 1 size = ompi_comm_remote_size (comm); - } else { - obj->size = ompi_comm_size (comm); - } - - obj->comm = comm; - obj->reqs = (ompi_request_t **) malloc(2*obj->size*sizeof(ompi_request_t *)); - if ( NULL == obj->reqs ) { - free (obj); - return NULL; - } - - /* initiate all isend_irecvs. We use a dummy buffer stored on - the object, since we are sending zero size messages anyway. */ - for ( i=0; i < obj->size; i++ ) { - ret = MCA_PML_CALL(irecv (&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, comm, - &(obj->reqs[2*i]))); - - if ( OMPI_SUCCESS != ret ) { - free (obj->reqs); - free (obj); - return NULL; - } - - ret = MCA_PML_CALL(isend (&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, - MCA_PML_BASE_SEND_SYNCHRONOUS, - comm, &(obj->reqs[2*i+1]))); - - if ( OMPI_SUCCESS != ret ) { - free (obj->reqs); - free (obj); - return NULL; - } - } - - /* return handle */ - return obj; -} -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* - count how many requests are active - * - generate a request array large enough to hold - all active requests - * - call waitall on the overall request array - * - free the objects - */ -void ompi_comm_disconnect_waitall (int count, ompi_comm_disconnect_obj **objs) -{ - - ompi_request_t **reqs=NULL; - char *treq=NULL; - int totalcount = 0; - int i; - int ret; - - for (i=0; isize; - } - - reqs = (ompi_request_t **) malloc (2*totalcount*sizeof(ompi_request_t *)); - if ( NULL == reqs ) { - printf("ompi_comm_disconnect_waitall: error allocating memory\n"); - return; - } - - /* generate a single, large array of pending requests */ - treq = (char *)reqs; - for (i=0; ireqs, 2*objs[i]->size * sizeof(ompi_request_t *)); - treq += 2*objs[i]->size * sizeof(ompi_request_t *); - } - - /* force all non-blocking all-to-alls to finish */ - ret = ompi_request_wait_all (2*totalcount, reqs, MPI_STATUSES_IGNORE); - - /* Finally, free everything */ - for (i=0; i< count; i++ ) { - if (NULL != objs[i]->reqs ) { - free (objs[i]->reqs ); - free (objs[i]); - } - } - - free (reqs); - - /* decrease the counter for dynamic communicators by 'count'. - Attention, this approach now requires, that we are just using - these routines for communicators which have been flagged dynamic */ - ompi_comm_num_dyncomm -=count; - - return; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -#define OMPI_COMM_MAXJOBIDS 64 -void ompi_comm_mark_dyncomm (ompi_communicator_t *comm) -{ - int i, j, numjobids=0; - int size, rsize; - int found; - orte_jobid_t jobids[OMPI_COMM_MAXJOBIDS], thisjobid; - ompi_group_t *grp=NULL; - ompi_proc_t *proc = NULL; - - /* special case for MPI_COMM_NULL */ - if ( comm == MPI_COMM_NULL ) { - return; - } - - size = ompi_comm_size (comm); - rsize = ompi_comm_remote_size(comm); - - /* loop over all processes in local group and count number - of different jobids. */ - grp = comm->c_local_group; - for (i=0; i< size; i++) { - proc = ompi_group_peer_lookup(grp,i); - thisjobid = proc->proc_name.jobid; - found = 0; - for ( j=0; jc_remote_group; - for (i=0; i< rsize; i++) { - proc = ompi_group_peer_lookup(grp,i); - thisjobid = proc->proc_name.jobid; - found = 0; - for ( j=0; j 1 ) { - ompi_comm_num_dyncomm++; - OMPI_COMM_SET_DYNAMIC(comm); - } - - return; -} diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index ed43583081..348af04443 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -28,11 +28,11 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/coll/base/base.h" #include "ompi/mca/topo/base/base.h" -#include "orte/mca/ns/base/base.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/attribute/attribute.h" #include "ompi/mca/topo/topo.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" /* @@ -210,7 +210,7 @@ int ompi_comm_finalize(void) OBJ_DESTRUCT( &ompi_mpi_comm_self ); /* disconnect all dynamic communicators */ - ompi_comm_dyn_finalize(); + ompi_dpm.dyn_finalize(); /* Shut down MPI_COMM_WORLD */ OBJ_DESTRUCT( &ompi_mpi_comm_world ); diff --git a/ompi/communicator/comm_publish.c b/ompi/communicator/comm_publish.c deleted file mode 100644 index bee7f9e7f0..0000000000 --- a/ompi/communicator/comm_publish.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Cisco, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include - -#include "ompi/communicator/communicator.h" -#include "ompi/proc/proc.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/rml/rml_types.h" - -#define OMPI_COMM_PORT_KEY "ompi-port-name" - - -int ompi_open_port(char *port_name) -{ - ompi_proc_t **myproc=NULL; - char *name=NULL; - size_t size=0; - orte_rml_tag_t lport_id=0; - int rc; - - /* - * The port_name is equal to the OOB-contact information - * and an integer. The reason for adding the integer is - * to make the port unique for multi-threaded scenarios. - */ - - myproc = ompi_proc_self (&size); - if (ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string (&name, &(myproc[0]->proc_name)))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns.assign_rml_tag(&lport_id, NULL))) { - return rc; - } - - sprintf (port_name, "%s:%d", name, lport_id); - free ( myproc ); - free ( name ); - - return OMPI_SUCCESS; -} - -/* takes a port_name and separates it into the process_name - and the tag -*/ -char *ompi_parse_port (char *port_name, orte_rml_tag_t *tag) -{ - char tmp_port[MPI_MAX_PORT_NAME], *tmp_string; - - tmp_string = (char *) malloc (MPI_MAX_PORT_NAME); - if (NULL == tmp_string ) { - return NULL; - } - - strncpy (tmp_port, port_name, MPI_MAX_PORT_NAME); - strncpy (tmp_string, strtok(tmp_port, ":"), MPI_MAX_PORT_NAME); - sscanf( strtok(NULL, ":"),"%d", (int*)tag); - - return tmp_string; -} - -/* - * publish the port_name using the service_name as a token - * jobid and vpid are used later to make - * sure, that only this process can unpublish the information. - */ -int ompi_comm_namepublish ( char *service_name, char *port_name ) -{ - orte_gpr_value_t *value; - int rc; - - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_TOKENS_AND | ORTE_GPR_OVERWRITE, - OMPI_NAMESPACE_SEGMENT, 1, 1))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - value->tokens[0] = strdup(service_name); - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), OMPI_COMM_PORT_KEY, ORTE_STRING, port_name))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(value); - return rc; -} - -char* ompi_comm_namelookup ( char *service_name ) -{ - char *token[2], *key[2]; - orte_gpr_keyval_t **keyvals=NULL; - orte_gpr_value_t **values; - orte_std_cntr_t cnt=0; - char *stmp=NULL; - int ret; - - token[0] = service_name; - token[1] = NULL; - - key[0] = strdup(OMPI_COMM_PORT_KEY); - key[1] = NULL; - - ret = orte_gpr.get(ORTE_GPR_TOKENS_AND, OMPI_NAMESPACE_SEGMENT, - token, key, &cnt, &values); - if (ORTE_SUCCESS != ret) { - return NULL; - } - if ( 0 < cnt && NULL != values[0] ) { /* should be only one, if any */ - keyvals = values[0]->keyvals; - stmp = strdup((const char*)keyvals[0]->value->data); - OBJ_RELEASE(values[0]); - } - - return (stmp); -} - -/* - * delete the entry. Just the process who has published - * the service_name, has the right to remove this - * service. Will be done later, by adding jobid and vpid - * as tokens - */ -int ompi_comm_nameunpublish ( char *service_name ) -{ - char *token[2]; - - token[0] = service_name; - token[1] = NULL; -#if 0 - return orte_gpr.delete_entries(ORTE_GPR_TOKENS_AND, - OMPI_NAMESPACE_SEGMENT, - token, NULL); -#endif - return OMPI_SUCCESS; -} diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index 6401900f52..2a53bc95ed 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -65,7 +65,6 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); /* a set of special tags: */ /* to recognize an MPI_Comm_join in the comm_connect_accept routine. */ -#define OMPI_COMM_JOIN_TAG -32000 #define OMPI_COMM_ALLGATHER_TAG -31078 #define OMPI_COMM_BARRIER_TAG -31079 @@ -356,7 +355,7 @@ struct ompi_communicator_t { * the OOB version. * This routine has to be thread safe in the final version. */ - int ompi_comm_nextcid ( ompi_communicator_t* newcomm, +OMPI_DECLSPEC int ompi_comm_nextcid ( ompi_communicator_t* newcomm, ompi_communicator_t* oldcomm, ompi_communicator_t* bridgecomm, void* local_leader, @@ -373,7 +372,7 @@ struct ompi_communicator_t { * This is THE routine, where all the communicator stuff * is really set. */ - int ompi_comm_set ( ompi_communicator_t** newcomm, +OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, ompi_communicator_t* oldcomm, int local_size, int *local_ranks, @@ -412,7 +411,7 @@ struct ompi_communicator_t { int high ); - int ompi_comm_activate ( ompi_communicator_t* newcomm, +OMPI_DECLSPEC int ompi_comm_activate ( ompi_communicator_t* newcomm, ompi_communicator_t* oldcomm, ompi_communicator_t* bridgecomm, void* local_leader, @@ -427,35 +426,9 @@ struct ompi_communicator_t { */ int ompi_comm_dump ( ompi_communicator_t *comm ); - /** - * a simple function to determint a port number - */ - int ompi_open_port (char *port_name); - - /** - * takes a port_name and returns the oob-contact information - * and the tag - */ - char * ompi_parse_port (char *port_name, orte_rml_tag_t *tag) ; - - /** - * routines handling name publishing, lookup and unpublishing - */ - int ompi_comm_namepublish ( char *service_name, char *port_name ); - char* ompi_comm_namelookup ( char *service_name ); - int ompi_comm_nameunpublish ( char *service_name ); - - /* setting name */ int ompi_comm_set_name (ompi_communicator_t *comm, char *name ); - /* THE routine for dynamic process management. This routine - sets the connection up between two independent applications. - */ - int ompi_comm_connect_accept ( ompi_communicator_t *comm, int root, - orte_process_name_t *port, int send_first, - ompi_communicator_t **newcomm, orte_rml_tag_t tag); - /* * these are the init and finalize functions for the comm_reg * stuff. These routines are necessary for handling multi-threading @@ -464,59 +437,9 @@ struct ompi_communicator_t { void ompi_comm_reg_init(void); void ompi_comm_reg_finalize(void); - /* start the new processes from MPI_Comm_spawn_multiple. Initial - * version, very rough - */ - int ompi_comm_start_processes(int count, char **array_of_commands, - char ***array_of_argv, - int *array_of_maxprocs, - MPI_Info *array_of_info, - char *port_name); - - /* - * This routine checks, whether an application has been spawned - * by another MPI application, or has been independently started. - * If it has been spawned, it establishes the parent communicator. - * Since the routine has to communicate, it should be among the last - * steps in MPI_Init, to be sure that everything is already set up. - */ - int ompi_comm_dyn_init(void); - - /** - * Executes internally a disconnect on all dynamic communicators - * in case the user did not disconnect them. - */ - int ompi_comm_dyn_finalize(void); - - /* this routine counts the number of different jobids of the processes - given in a certain communicator. If there is more than one jobid, - we mark the communicator as 'dynamic'. This is especially relevant - for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have - to wait for all still connected processes. */ + /* global variable to save the number od dynamic communicators */ extern int ompi_comm_num_dyncomm; - void ompi_comm_mark_dyncomm (ompi_communicator_t *comm); - /* the next two routines implement a kind of non-blocking barrier. - the only difference is, that you can wait for the completion - of more than one initiated ibarrier. This is required for waiting - for all still connected processes in MPI_Finalize. - - ompi_comm_disconnect_init returns a handle, which has to be passed in - to ompi_comm_disconnect_waitall. The second routine blocks, until - all non-blocking barriers described by the handles are finished. - The communicators can than be released. - */ - - struct ompi_comm_disconnect_obj { - ompi_communicator_t *comm; - int size; - struct ompi_request_t **reqs; - int buf; - }; - typedef struct ompi_comm_disconnect_obj ompi_comm_disconnect_obj; - - ompi_comm_disconnect_obj *ompi_comm_disconnect_init (ompi_communicator_t *comm); - void ompi_comm_disconnect_waitall (int count, ompi_comm_disconnect_obj **objs ); END_C_DECLS diff --git a/ompi/datatype/convertor.c b/ompi/datatype/convertor.c index 70daae2bc8..836ab11f44 100644 --- a/ompi/datatype/convertor.c +++ b/ompi/datatype/convertor.c @@ -24,6 +24,7 @@ #ifdef HAVE_STRINGS_H #include #endif +#include #include "opal/prefetch.h" diff --git a/ompi/datatype/datatype_unpack.c b/ompi/datatype/datatype_unpack.c index 17c9557af0..1884772e16 100644 --- a/ompi/datatype/datatype_unpack.c +++ b/ompi/datatype/datatype_unpack.c @@ -24,6 +24,8 @@ #include "ompi/datatype/convertor_internal.h" #include "ompi/datatype/datatype_internal.h" +#include + #if OMPI_ENABLE_DEBUG extern int ompi_unpack_debug; #define DO_DEBUG(INST) if( ompi_unpack_debug ) { INST } diff --git a/ompi/datatype/dt_module.c b/ompi/datatype/dt_module.c index c272186736..71619ca3a0 100644 --- a/ompi/datatype/dt_module.c +++ b/ompi/datatype/dt_module.c @@ -25,6 +25,7 @@ #include "ompi/datatype/datatype.h" #include "ompi/datatype/datatype_internal.h" #include "ompi/datatype/convertor_internal.h" +#include #if OMPI_ENABLE_DEBUG #include "opal/mca/base/mca_base_param.h" diff --git a/ompi/group/group.h b/ompi/group/group.h index cd0ad089e1..2fc98e9c7a 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -136,7 +136,7 @@ OMPI_DECLSPEC extern ompi_group_t ompi_mpi_group_null; * * @return Pointer to new group structure */ -ompi_group_t *ompi_group_allocate(int group_size); +OMPI_DECLSPEC ompi_group_t *ompi_group_allocate(int group_size); ompi_group_t *ompi_group_allocate_sporadic(int group_size); ompi_group_t *ompi_group_allocate_strided(void); ompi_group_t *ompi_group_allocate_bmap(int orig_group_size, int group_size); diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index 336f03f012..c86a72d370 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -286,7 +286,7 @@ typedef int (MPI_Grequest_cancel_function)(void *, int); #define MPI_ARGV_NULL ((char **) 0) /* NULL argument vector */ #define MPI_ARGVS_NULL ((char ***) 0) /* NULL argument vectors */ #define MPI_ERRCODES_IGNORE ((int *) 0) /* don't return error codes */ -#define MPI_MAX_PORT_NAME 36 /* max port name length */ +#define MPI_MAX_PORT_NAME 256 /* max port name length */ #define MPI_MAX_NAME_LEN MPI_MAX_PORT_NAME /* max port name length */ #define MPI_ORDER_C 0 /* C row major order */ #define MPI_ORDER_FORTRAN 1 /* Fortran column major order */ diff --git a/ompi/include/mpif-common.h b/ompi/include/mpif-common.h index 8042bb0554..adeab15dae 100644 --- a/ompi/include/mpif-common.h +++ b/ompi/include/mpif-common.h @@ -133,7 +133,7 @@ parameter (MPI_BSEND_OVERHEAD=128) parameter (MPI_MAX_INFO_KEY=35) parameter (MPI_MAX_INFO_VAL=255) - parameter (MPI_MAX_PORT_NAME=35) + parameter (MPI_MAX_PORT_NAME=255) parameter (MPI_MAX_OBJECT_NAME=63) parameter (MPI_ORDER_C=0) parameter (MPI_ORDER_FORTRAN=1) diff --git a/ompi/include/ompi/constants.h b/ompi/include/ompi/constants.h index 86f543f520..f4fb75b8aa 100644 --- a/ompi/include/ompi/constants.h +++ b/ompi/include/ompi/constants.h @@ -19,7 +19,7 @@ #ifndef OMPI_CONSTANTS_H #define OMPI_CONSTANTS_H -#include "orte/orte_constants.h" +#include "orte/constants.h" #define OMPI_ERR_BASE ORTE_ERR_MAX diff --git a/ompi/info/info.c b/ompi/info/info.c index 5385a7b3e5..37745771ce 100644 --- a/ompi/info/info.c +++ b/ompi/info/info.c @@ -10,6 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,6 +28,7 @@ #include #endif #include +#include #include "ompi/constants.h" #include "ompi/info/info.h" @@ -207,6 +209,46 @@ int ompi_info_get (ompi_info_t *info, char *key, int valuelen, } +/* + * Similar to ompi_info_get(), but cast the result into a boolean + * using some well-defined rules. + */ +int ompi_info_get_bool(ompi_info_t *info, char *key, bool *value, int *flag) +{ + char *ptr; + char str[256]; + + str[sizeof(str) - 1] = '\0'; + ompi_info_get(info, key, sizeof(str) - 1, str, flag); + if (*flag) { + *value = false; + + /* Trim whitespace */ + ptr = str + sizeof(str) - 1; + while (ptr >= str && isspace(*ptr)) { + *ptr = '\0'; + --ptr; + } + ptr = str; + while (ptr < str + sizeof(str) - 1 && *ptr != '\0' && + isspace(*ptr)) { + ++ptr; + } + if ('\0' != *ptr) { + if (isdigit(*ptr)) { + *value = (bool) atoi(ptr); + } else if (0 == strcasecmp(ptr, "yes") || + 0 == strcasecmp(ptr, "true")) { + *value = true; + } else if (0 != strcasecmp(ptr, "no") && + 0 != strcasecmp(ptr, "false")) { + /* RHC unrecognized value -- print a warning? */ + } + } + } + return MPI_SUCCESS; +} + /* * Delete a key from an info */ diff --git a/ompi/info/info.h b/ompi/info/info.h index c58452f55e..cc70e59b24 100644 --- a/ompi/info/info.h +++ b/ompi/info/info.h @@ -10,6 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -148,6 +149,32 @@ int ompi_info_set (ompi_info_t *info, char *key, char *value); */ int ompi_info_free (ompi_info_t **info); + /** + * Get a (key, value) pair from an 'MPI_Info' object and assign it + * into a boolen output. + * + * @param info Pointer to ompi_info_t object + * @param key null-terminated character string of the index key + * @param value Boolean output value + * @param flag true (1) if 'key' defined on 'info', false (0) if not + * (logical) + * + * @retval MPI_SUCCESS + * + * If found, the string value will be cast to the boolen output in + * the following manner: + * + * - If the string value is digits, the return value is "(bool) + * atoi(value)" + * - If the string value is (case-insensitive) "yes" or "true", the + * result is true + * - If the string value is (case-insensitive) "no" or "false", the + * result is false + * - All other values are false + */ +OMPI_DECLSPEC int ompi_info_get_bool (ompi_info_t *info, char *key, bool *value, + int *flag); + /** * Get a (key, value) pair from an 'MPI_Info' object * @@ -163,8 +190,8 @@ int ompi_info_free (ompi_info_t **info); * In C and C++, 'valuelen' should be one less than the allocated * space to allow for for the null terminator. */ -int ompi_info_get (ompi_info_t *info, char *key, int valuelen, - char *value, int *flag); +OMPI_DECLSPEC int ompi_info_get (ompi_info_t *info, char *key, int valuelen, + char *value, int *flag); /** * Delete a (key,value) pair from "info" diff --git a/ompi/mca/bml/r2/bml_r2.c b/ompi/mca/bml/r2/bml_r2.c index c94bd3eaad..28120e3d59 100644 --- a/ompi/mca/bml/r2/bml_r2.c +++ b/ompi/mca/bml/r2/bml_r2.c @@ -24,7 +24,6 @@ #include #include "opal/util/show_help.h" -#include "orte/mca/ns/ns.h" #include "ompi/class/ompi_bitmap.h" #include "ompi/mca/bml/bml.h" #include "ompi/mca/bml/base/base.h" @@ -34,6 +33,7 @@ #include "ompi/mca/bml/base/bml_base_btl.h" #include "bml_r2.h" #include "orte/class/orte_proc_table.h" +#include "orte/util/name_fns.h" #include "ompi/proc/proc.h" extern mca_bml_base_component_t mca_bml_r2_component; @@ -450,9 +450,9 @@ int mca_bml_r2_add_procs( OMPI_ERR_UNREACH == ret) { char *local, *remote; - orte_ns.get_proc_name_string(&local, + orte_util_convert_process_name_to_string(&local, &(ompi_proc_local_proc->proc_name)); - orte_ns.get_proc_name_string(&remote, + orte_util_convert_process_name_to_string(&remote, &(unreach_proc->proc_name)); opal_show_help("help-mca-bml-r2", diff --git a/ompi/mca/bml/r2/bml_r2_ft.c b/ompi/mca/bml/r2/bml_r2_ft.c index 72ca634f01..8367d94ba1 100644 --- a/ompi/mca/bml/r2/bml_r2_ft.c +++ b/ompi/mca/bml/r2/bml_r2_ft.c @@ -24,7 +24,6 @@ #include #include "opal/util/show_help.h" -#include "orte/mca/ns/ns.h" #include "ompi/runtime/ompi_cr.h" #include "ompi/class/ompi_bitmap.h" #include "ompi/mca/bml/bml.h" @@ -35,9 +34,7 @@ #include "ompi/mca/bml/base/bml_base_btl.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/gpr/gpr.h" #include "orte/class/orte_proc_table.h" #include "ompi/proc/proc.h" @@ -117,9 +114,11 @@ int mca_bml_r2_ft_event(int state) { if( NULL != mca_bml_r2.btl_modules) { free( mca_bml_r2.btl_modules); + mca_bml_r2.btl_modules = NULL; } if( NULL != mca_bml_r2.btl_progress ) { free( mca_bml_r2.btl_progress); + mca_bml_r2.btl_progress = NULL; } opal_output_verbose(10, ompi_cr_output, @@ -163,8 +162,10 @@ int mca_bml_r2_ft_event(int state) { mca_bml_r2.btls_added = false; for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]->proc_bml); - procs[p]->proc_bml = NULL; + if( NULL != procs[p]->proc_bml) { + OBJ_RELEASE(procs[p]->proc_bml); + procs[p]->proc_bml = NULL; + } OBJ_RELEASE(procs[p]); } diff --git a/ompi/mca/btl/base/btl_base_error.c b/ompi/mca/btl/base/btl_base_error.c index 2caac992e6..efa06ca59c 100644 --- a/ompi/mca/btl/base/btl_base_error.c +++ b/ompi/mca/btl/base/btl_base_error.c @@ -25,8 +25,11 @@ #include "base.h" #include "btl_base_error.h" #include "opal/util/show_help.h" + #include "orte/util/sys_info.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" int mca_btl_base_verbose; @@ -60,7 +63,7 @@ void mca_btl_base_error_no_nics(const char* transport, char *procid; if (mca_btl_base_warn_component_unused) { /* print out no-nic warning if user told us to */ - asprintf(&procid, "%s", ORTE_NAME_PRINT(orte_process_info.my_name)); + asprintf(&procid, "%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_show_help("help-mpi-btl-base.txt", "btl:no-nics", true, procid, transport, orte_system_info.nodename, diff --git a/ompi/mca/btl/base/btl_base_error.h b/ompi/mca/btl/base/btl_base_error.h index 87d22c2172..42aab4195e 100644 --- a/ompi/mca/btl/base/btl_base_error.h +++ b/ompi/mca/btl/base/btl_base_error.h @@ -28,7 +28,8 @@ #include "orte/util/proc_info.h" #include "orte/util/sys_info.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" OMPI_DECLSPEC extern int mca_btl_base_verbose; @@ -39,7 +40,7 @@ extern int mca_btl_base_out(const char*, ...); do { \ mca_btl_base_out("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__); \ mca_btl_base_out args; \ mca_btl_base_out("\n"); \ @@ -50,7 +51,7 @@ do { \ do { \ mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__); \ mca_btl_base_err args; \ mca_btl_base_err("\n"); \ @@ -59,7 +60,7 @@ do { \ #define BTL_PEER_ERROR(proc, args) \ do { \ mca_btl_base_err("%s[%s:%d:%s] from %s ", \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__, \ orte_system_info.nodename); \ if(proc && proc->proc_hostname) { \ @@ -76,7 +77,7 @@ do { \ if(mca_btl_base_verbose > 0) { \ mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__); \ mca_btl_base_err args; \ mca_btl_base_err("\n"); \ @@ -89,13 +90,9 @@ do { \ #endif -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS OMPI_DECLSPEC extern void mca_btl_base_error_no_nics(const char* transport, const char* nic_name); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS diff --git a/ompi/mca/btl/base/btl_base_select.c b/ompi/mca/btl/base/btl_base_select.c index a8c1378608..49fc876972 100644 --- a/ompi/mca/btl/base/btl_base_select.c +++ b/ompi/mca/btl/base/btl_base_select.c @@ -154,7 +154,7 @@ int mca_btl_base_select(bool enable_progress_threads, if (0 == opal_list_get_size(&mca_btl_base_modules_initialized)) { opal_show_help("help-mca-base.txt", "find-available:none-found", true, "btl"); - orte_errmgr.error_detected(1, NULL); + orte_errmgr.abort(1, NULL); } return OMPI_SUCCESS; } diff --git a/ompi/mca/btl/elan/btl_elan_component.c b/ompi/mca/btl/elan/btl_elan_component.c index 56e79b205d..f90986387c 100644 --- a/ompi/mca/btl/elan/btl_elan_component.c +++ b/ompi/mca/btl/elan/btl_elan_component.c @@ -23,6 +23,9 @@ #include "ompi/runtime/ompi_module_exchange.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/runtime/orte_globals.h" +#include "ompi/mca/mpool/base/base.h" + #include "btl_elan.h" #include "btl_elan_frag.h" #include "btl_elan_endpoint.h" @@ -225,6 +228,8 @@ mca_btl_elan_component_init( int *num_btl_modules, mca_btl_elan_component.elan_free_list_inc, NULL ); /* use default allocator */ + vpid = ORTE_PROC_MY_NAME->vpid; + ompi_modex_send( &mca_btl_elan_component.super.btl_version, &vpid, sizeof(vpid)); diff --git a/ompi/mca/btl/elan/btl_elan_endpoint.c b/ompi/mca/btl/elan/btl_elan_endpoint.c index 6edb00cbe4..241005622a 100644 --- a/ompi/mca/btl/elan/btl_elan_endpoint.c +++ b/ompi/mca/btl/elan/btl_elan_endpoint.c @@ -14,11 +14,10 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "btl_elan.h" #include "btl_elan_endpoint.h" #include "btl_elan_proc.h" diff --git a/ompi/mca/btl/elan/btl_elan_proc.h b/ompi/mca/btl/elan/btl_elan_proc.h index 162be777dd..5a439b48fe 100644 --- a/ompi/mca/btl/elan/btl_elan_proc.h +++ b/ompi/mca/btl/elan/btl_elan_proc.h @@ -12,7 +12,6 @@ #ifndef MCA_BTL_ELAN_PROC_H #define MCA_BTL_ELAN_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_elan.h" diff --git a/ompi/mca/btl/gm/btl_gm_component.c b/ompi/mca/btl/gm/btl_gm_component.c index fc779ba8f8..42bdad47c4 100644 --- a/ompi/mca/btl/gm/btl_gm_component.c +++ b/ompi/mca/btl/gm/btl_gm_component.c @@ -44,6 +44,8 @@ #include "ompi/datatype/convertor.h" #include "btl_gm_endpoint.h" #include "orte/util/proc_info.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" #include "ompi/runtime/ompi_module_exchange.h" @@ -440,7 +442,7 @@ static int mca_btl_gm_discover( void ) "%s gm_port %08lX, " "board %" PRIu32 ", global %" PRIu32 " " "node %" PRIu32 "port %" PRIu32 "\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long) port, board_no, global_id, node_id, port_no); } diff --git a/ompi/mca/btl/gm/btl_gm_endpoint.c b/ompi/mca/btl/gm/btl_gm_endpoint.c index 8e4b74dbec..3d964a7433 100644 --- a/ompi/mca/btl/gm/btl_gm_endpoint.c +++ b/ompi/mca/btl/gm/btl_gm_endpoint.c @@ -21,11 +21,6 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" #include "btl_gm.h" #include "btl_gm_endpoint.h" #include "btl_gm_proc.h" diff --git a/ompi/mca/btl/gm/btl_gm_proc.c b/ompi/mca/btl/gm/btl_gm_proc.c index cb2b11d3cf..476fa4ddee 100644 --- a/ompi/mca/btl/gm/btl_gm_proc.c +++ b/ompi/mca/btl/gm/btl_gm_proc.c @@ -25,6 +25,9 @@ #include #endif +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "btl_gm.h" #include "btl_gm_proc.h" @@ -191,7 +194,7 @@ int mca_btl_gm_proc_insert( if(mca_btl_gm_component.gm_debug > 0) { opal_output(0, "%s mapped global id %" PRIu32 " to node id %" PRIu32 "\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), gm_endpoint->endpoint_addr.global_id, gm_endpoint->endpoint_addr.node_id); } diff --git a/ompi/mca/btl/gm/btl_gm_proc.h b/ompi/mca/btl/gm/btl_gm_proc.h index d3cd7fd470..ac23f28346 100644 --- a/ompi/mca/btl/gm/btl_gm_proc.h +++ b/ompi/mca/btl/gm/btl_gm_proc.h @@ -19,7 +19,6 @@ #ifndef MCA_BTL_GM_PROC_H #define MCA_BTL_GM_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_gm.h" diff --git a/ompi/mca/btl/mx/btl_mx_endpoint.c b/ompi/mca/btl/mx/btl_mx_endpoint.c index bc0a07068f..2b749f52ee 100644 --- a/ompi/mca/btl/mx/btl_mx_endpoint.c +++ b/ompi/mca/btl/mx/btl_mx_endpoint.c @@ -21,10 +21,6 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/errmgr/errmgr.h" #include "btl_mx.h" #include "btl_mx_endpoint.h" #include "btl_mx_proc.h" diff --git a/ompi/mca/btl/mx/btl_mx_proc.c b/ompi/mca/btl/mx/btl_mx_proc.c index dbd9535b4a..8fa0991231 100644 --- a/ompi/mca/btl/mx/btl_mx_proc.c +++ b/ompi/mca/btl/mx/btl_mx_proc.c @@ -19,6 +19,7 @@ #include "ompi_config.h" #include "opal/class/opal_hash_table.h" +#include "orte/util/name_fns.h" #include "ompi/runtime/ompi_module_exchange.h" #include "btl_mx.h" diff --git a/ompi/mca/btl/mx/btl_mx_proc.h b/ompi/mca/btl/mx/btl_mx_proc.h index f5aa8f4d9f..037af79239 100644 --- a/ompi/mca/btl/mx/btl_mx_proc.h +++ b/ompi/mca/btl/mx/btl_mx_proc.h @@ -19,7 +19,6 @@ #ifndef MCA_BTL_MX_PROC_H #define MCA_BTL_MX_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_mx.h" diff --git a/ompi/mca/btl/ofud/btl_ofud_component.c b/ompi/mca/btl/ofud/btl_ofud_component.c index bdc0297a8d..7ce5c9dd48 100644 --- a/ompi/mca/btl/ofud/btl_ofud_component.c +++ b/ompi/mca/btl/ofud/btl_ofud_component.c @@ -34,6 +34,8 @@ #include "ompi/mca/mpool/rdma/mpool_rdma.h" #include "ompi/runtime/ompi_module_exchange.h" +#include "orte/runtime/orte_globals.h" + #include "btl_ofud.h" #include "btl_ofud_frag.h" #include "btl_ofud_endpoint.h" @@ -280,7 +282,7 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules, *num_btl_modules = 0; num_devs = 0; - seedv[0] = orte_process_info.my_name->vpid; + seedv[0] = ORTE_PROC_MY_NAME->vpid; seedv[1] = opal_sys_timer_get_cycles(); seedv[2] = opal_sys_timer_get_cycles(); seed48(seedv); diff --git a/ompi/mca/btl/ofud/btl_ofud_proc.h b/ompi/mca/btl/ofud/btl_ofud_proc.h index c9994f8afa..ea4b5f448d 100644 --- a/ompi/mca/btl/ofud/btl_ofud_proc.h +++ b/ompi/mca/btl/ofud/btl_ofud_proc.h @@ -22,7 +22,6 @@ #define MCA_BTL_UD_PROC_H #include "opal/class/opal_object.h" -#include "orte/mca/ns/ns.h" #include "ompi/proc/proc.h" #include "btl_ofud.h" diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c index 29b3e27e28..2ba9346999 100644 --- a/ompi/mca/btl/openib/btl_openib.c +++ b/ompi/mca/btl/openib/btl_openib.c @@ -57,6 +57,7 @@ #ifdef HAVE_SYS_RESOURCE_H #include #endif +#include mca_btl_openib_module_t mca_btl_openib_module = { { diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index 6b4440a7e2..a979eae7bc 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -46,6 +46,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/sys_info.h" +#include "orte/runtime/orte_globals.h" #include "ompi/proc/proc.h" #include "ompi/mca/pml/pml.h" @@ -1315,7 +1316,7 @@ btl_openib_component_init(int *num_btl_modules, *num_btl_modules = 0; num_devs = 0; - seedv[0] = orte_process_info.my_name->vpid; + seedv[0] = ORTE_PROC_MY_NAME->vpid; seedv[1] = opal_sys_timer_get_cycles(); seedv[2] = opal_sys_timer_get_cycles(); seed48(seedv); diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c index 164176a36d..ae639421cb 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.c +++ b/ompi/mca/btl/openib/btl_openib_endpoint.c @@ -30,11 +30,9 @@ #include #include -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" #include "ompi/types.h" #include "ompi/mca/pml/base/pml_base_sendreq.h" diff --git a/ompi/mca/btl/openib/btl_openib_proc.h b/ompi/mca/btl/openib/btl_openib_proc.h index d0f35f9c01..f23239fe55 100644 --- a/ompi/mca/btl/openib/btl_openib_proc.h +++ b/ompi/mca/btl/openib/btl_openib_proc.h @@ -20,7 +20,6 @@ #ifndef MCA_BTL_IB_PROC_H #define MCA_BTL_IB_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_openib.h" diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c b/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c index d8175a6e37..3009c90b54 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c @@ -22,11 +22,14 @@ #include "ompi_config.h" -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + +#include "ompi/mca/dpm/dpm.h" #include "btl_openib.h" #include "btl_openib_endpoint.h" @@ -59,10 +62,10 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, uint8_t message_type); static void rml_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); static void rml_recv_cb(int status, orte_process_name_t* process_name, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); /* @@ -107,7 +110,7 @@ static int oob_init(void) int rc; rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_OPENIB, + OMPI_RML_TAG_OPENIB, ORTE_RML_PERSISTENT, rml_recv_cb, NULL); @@ -158,7 +161,7 @@ static int oob_query(mca_btl_openib_hca_t *hca) */ static int oob_finalize(void) { - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_OPENIB); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB); return OMPI_SUCCESS; } @@ -416,7 +419,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp, static int send_connect_data(mca_btl_base_endpoint_t* endpoint, uint8_t message_type) { - orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t); + opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t); int rc; if (NULL == buffer) { @@ -425,15 +428,15 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, } /* pack the info in the send buffer */ - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT8)); - rc = orte_dss.pack(buffer, &message_type, 1, ORTE_UINT8); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8)); + rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT64)); - rc = orte_dss.pack(buffer, &endpoint->subnet_id, 1, ORTE_UINT64); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64)); + rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -441,16 +444,16 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, if (message_type != ENDPOINT_CONNECT_REQUEST) { /* send the QP connect request info we respond to */ - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->rem_info.rem_qps[0].rem_qp_num, 1, - ORTE_UINT32); + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16)); - rc = orte_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, ORTE_UINT16); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16)); + rc = opal_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -461,37 +464,37 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, int qp; /* stuff all the QP info into the buffer */ for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num, - 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num, + 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1, - ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, ORTE_UINT16); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1, - ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->index, 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -500,7 +503,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, /* send to remote endpoint */ rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, - buffer, ORTE_RML_TAG_OPENIB, 0, + buffer, OMPI_RML_TAG_OPENIB, 0, rml_send_cb, NULL); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -519,7 +522,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint, * remote peer */ static void rml_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { OBJ_RELEASE(buffer); @@ -532,7 +535,7 @@ static void rml_send_cb(int status, orte_process_name_t* endpoint, * otherwise try to modify QP's and establish reliable connection */ static void rml_recv_cb(int status, orte_process_name_t* process_name, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { mca_btl_openib_proc_t *ib_proc; @@ -548,29 +551,29 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name, /* start by unpacking data first so we know who is knocking at our door */ - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT8)); - rc = orte_dss.unpack(buffer, &message_type, &cnt, ORTE_UINT8); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8)); + rc = opal_dss.unpack(buffer, &message_type, &cnt, OPAL_UINT8); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT64)); - rc = orte_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, ORTE_UINT64); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64)); + rc = opal_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, OPAL_UINT64); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } if (ENDPOINT_CONNECT_REQUEST != message_type) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &lcl_qp, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &lcl_qp, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16)); - rc = orte_dss.unpack(buffer, &lcl_lid, &cnt, ORTE_UINT16); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16)); + rc = opal_dss.unpack(buffer, &lcl_lid, &cnt, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; @@ -585,36 +588,36 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name, /* unpack all the qp info */ for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt, - ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_psn, &cnt, - ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_psn, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16)); - rc = orte_dss.unpack(buffer, &rem_info.rem_lid, &cnt, ORTE_UINT16); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16)); + rc = opal_dss.unpack(buffer, &rem_info.rem_lid, &cnt, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &rem_info.rem_index, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &rem_info.rem_index, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; @@ -625,7 +628,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name, rem_info.rem_lid, rem_info.rem_subnet_id)); - master = orte_ns.compare_fields(ORTE_NS_CMP_ALL, orte_process_info.my_name, + master = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, process_name) > 0 ? true : false; for (ib_proc = (mca_btl_openib_proc_t*) @@ -635,8 +638,8 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name, ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) { bool found = false; - if (orte_ns.compare_fields(ORTE_NS_CMP_ALL, - &ib_proc->proc_guid, process_name) != ORTE_EQUAL) { + if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &ib_proc->proc_guid, process_name) != OPAL_EQUAL) { continue; } diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c index b52479722a..2345339414 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c @@ -10,11 +10,11 @@ #include "ompi_config.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/oob/base/base.h" +#include "orte/util/name_fns.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" +#include "ompi/mca/dpm/dpm.h" #include "btl_openib.h" #include "btl_openib_endpoint.h" @@ -55,8 +55,6 @@ typedef enum { ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE /* The xrc recv qp already was destroyed */ } connect_message_type_t; -#define XOOB_TAG (ORTE_RML_TAG_DYNAMIC - 1) - #define XOOB_SET_REMOTE_INFO(EP, INFO) \ do { \ /* copy the rem_info stuff */ \ @@ -79,7 +77,7 @@ static int xoob_priority = 60; * remote peer */ static void xoob_rml_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { OBJ_RELEASE(buffer); @@ -87,29 +85,29 @@ static void xoob_rml_send_cb(int status, orte_process_name_t* endpoint, /* Receive connect information to remote endpoint */ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *lid, - uint8_t *message_type, orte_buffer_t* buffer) + uint8_t *message_type, opal_buffer_t* buffer) { int cnt = 1, rc, srq; /* Recv standart header */ - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT8)); - rc = orte_dss.unpack(buffer, message_type, &cnt, ORTE_UINT8); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8)); + rc = opal_dss.unpack(buffer, message_type, &cnt, OPAL_UINT8); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; } BTL_VERBOSE(("Recv unpack Message type = %d", *message_type)); - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT64)); - rc = orte_dss.unpack(buffer, &info->rem_subnet_id, &cnt, ORTE_UINT64); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64)); + rc = opal_dss.unpack(buffer, &info->rem_subnet_id, &cnt, OPAL_UINT64); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; } BTL_VERBOSE(("Recv unpack sid = %d", info->rem_subnet_id)); - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16)); - rc = orte_dss.unpack(buffer, &info->rem_lid, &cnt, ORTE_UINT16); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16)); + rc = opal_dss.unpack(buffer, &info->rem_lid, &cnt, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -121,26 +119,26 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * */ if (ENDPOINT_XOOB_CONNECT_REQUEST == *message_type || ENDPOINT_XOOB_CONNECT_RESPONSE == *message_type) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt, - ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; } BTL_VERBOSE(("Recv unpack remote qp = %x", info->rem_qps->rem_qp_num)); - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_qps->rem_psn, &cnt, - ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_qps->rem_psn, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; } BTL_VERBOSE(("Recv unpack remote psn = %d", info->rem_qps->rem_psn)); - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_mtu, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_mtu, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -151,8 +149,8 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * if (ENDPOINT_XOOB_CONNECT_REQUEST == *message_type || ENDPOINT_XOOB_CONNECT_XRC_REQUEST == *message_type) { /* unpack requested lid info */ - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16)); - rc = orte_dss.unpack(buffer, lid, &cnt, ORTE_UINT16); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16)); + rc = opal_dss.unpack(buffer, lid, &cnt, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -162,10 +160,10 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * /* Unpack requested recv qp number */ if (ENDPOINT_XOOB_CONNECT_XRC_REQUEST == *message_type) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); /* In XRC request case we will use rem_qp_num as container for requested qp number */ - rc = orte_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt, - ORTE_UINT32); + rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -175,8 +173,8 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * if (ENDPOINT_XOOB_CONNECT_RESPONSE == *message_type || ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == *message_type) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_index, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_index, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -184,8 +182,8 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * BTL_VERBOSE(("Recv unpack remote index = %d", info->rem_index)); for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) { - BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32)); - rc = orte_dss.unpack(buffer, &info->rem_srqs[srq].rem_srq_num, &cnt, ORTE_UINT32); + BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32)); + rc = opal_dss.unpack(buffer, &info->rem_srqs[srq].rem_srq_num, &cnt, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return OMPI_ERROR; @@ -202,7 +200,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t * static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, uint8_t message_type) { - orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t); + opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t); int rc, srq; if (NULL == buffer) { @@ -217,24 +215,24 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, */ /* pack the info in the send buffer */ BTL_VERBOSE(("Send pack Message type = %d", message_type)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT8)); - rc = orte_dss.pack(buffer, &message_type, 1, ORTE_UINT8); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8)); + rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } BTL_VERBOSE(("Send pack sid = %d", endpoint->subnet_id)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT64)); - rc = orte_dss.pack(buffer, &endpoint->subnet_id, 1, ORTE_UINT64); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64)); + rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } BTL_VERBOSE(("Send pack lid = %d", endpoint->endpoint_btl->lid)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, ORTE_UINT16); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -260,24 +258,24 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, /* stuff all the QP info into the buffer */ /* we need to send only one QP */ BTL_VERBOSE(("Send pack qp num = %x", qp_num)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &qp_num, 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &qp_num, 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } BTL_VERBOSE(("Send pack lpsn = %d", psn)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &psn, 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &psn, 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } BTL_VERBOSE(("Send pack mtu = %d", endpoint->endpoint_btl->hca->mtu)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1, - ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1, + OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -293,8 +291,8 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, /* when we are sending request we add remote lid that we want to connect */ BTL_VERBOSE(("Send pack remote lid = %d", endpoint->ib_addr->lid)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16)); - rc = orte_dss.pack(buffer, &endpoint->ib_addr->lid, 1, ORTE_UINT16); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16)); + rc = opal_dss.pack(buffer, &endpoint->ib_addr->lid, 1, OPAL_UINT16); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -305,9 +303,9 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, * recv qp number that we want to connect. */ if (ENDPOINT_XOOB_CONNECT_XRC_REQUEST == message_type) { BTL_VERBOSE(("Send pack remote qp = %x", endpoint->ib_addr->remote_xrc_rcv_qp_num)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->ib_addr->remote_xrc_rcv_qp_num, - 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->ib_addr->remote_xrc_rcv_qp_num, + 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -322,8 +320,8 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == message_type) { /* we need to send the endpoint index for immidate send */ BTL_VERBOSE(("Send pack index = %d", endpoint->index)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->index, 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -331,9 +329,9 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, /* on response we add all SRQ numbers */ for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) { BTL_VERBOSE(("Send pack srq[%d] num = %d", srq, endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num)); - BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32)); - rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num, - 1, ORTE_UINT32); + BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32)); + rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num, + 1, OPAL_UINT32); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -343,7 +341,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint, /* send to remote endpoint */ rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, - buffer, XOOB_TAG, 0, + buffer, OMPI_RML_TAG_XOPENIB, 0, xoob_rml_send_cb, NULL); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -642,8 +640,8 @@ static mca_btl_openib_endpoint_t* xoob_find_endpoint(orte_process_name_t* proces ib_proc != (mca_btl_openib_proc_t*) opal_list_get_end(&mca_btl_openib_component.ib_procs); ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) { - if (orte_ns.compare_fields(ORTE_NS_CMP_ALL, - &ib_proc->proc_guid, process_name) == ORTE_EQUAL) { + if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &ib_proc->proc_guid, process_name) == OPAL_EQUAL) { found = true; break; } @@ -753,7 +751,7 @@ static void free_rem_info(mca_btl_openib_rem_info_t *rem_info) * otherwise try to modify QP's and establish reliable connection */ static void xoob_rml_recv_cb(int status, orte_process_name_t* process_name, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { int rc; @@ -947,7 +945,7 @@ static int xoob_init(void) int rc; rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - XOOB_TAG, + OMPI_RML_TAG_XOPENIB, ORTE_RML_PERSISTENT, xoob_rml_recv_cb, NULL); @@ -1018,6 +1016,6 @@ static int xoob_start_connect(mca_btl_base_endpoint_t *endpoint) */ static int xoob_finalize(void) { - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, XOOB_TAG); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_XOPENIB); return OMPI_SUCCESS; } diff --git a/ompi/mca/btl/sctp/btl_sctp_component.c b/ompi/mca/btl/sctp/btl_sctp_component.c index dd61d236ff..150b5ef14e 100644 --- a/ompi/mca/btl/sctp/btl_sctp_component.c +++ b/ompi/mca/btl/sctp/btl_sctp_component.c @@ -48,7 +48,6 @@ #include "opal/util/argv.h" #include "opal/util/output.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/ns/ns_types.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" diff --git a/ompi/mca/btl/sctp/btl_sctp_endpoint.c b/ompi/mca/btl/sctp/btl_sctp_endpoint.c index 60a441e683..5a9bf09a97 100644 --- a/ompi/mca/btl/sctp/btl_sctp_endpoint.c +++ b/ompi/mca/btl/sctp/btl_sctp_endpoint.c @@ -55,6 +55,7 @@ #include "ompi/types.h" #include "ompi/mca/btl/base/btl_base_error.h" +#include "orte/util/name_fns.h" #include "btl_sctp.h" #include "btl_sctp_endpoint.h" #include "btl_sctp_proc.h" @@ -567,7 +568,6 @@ bool mca_btl_sctp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct /* 1 to 1 */ mca_btl_sctp_addr_t* btl_addr; mca_btl_sctp_proc_t* this_proc = mca_btl_sctp_proc_local(); - orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL; int cmpval; OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock); @@ -576,7 +576,7 @@ bool mca_btl_sctp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct btl_addr->addr_inet.s_addr == addr->sin_addr.s_addr) { mca_btl_sctp_proc_t *endpoint_proc = btl_endpoint->endpoint_proc; - cmpval = orte_ns.compare_fields(mask, + cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &endpoint_proc->proc_ompi->proc_name, &this_proc->proc_ompi->proc_name); if((btl_endpoint->endpoint_sd < 0) || diff --git a/ompi/mca/btl/sctp/btl_sctp_proc.h b/ompi/mca/btl/sctp/btl_sctp_proc.h index ab5560810c..79d4dc0fa6 100644 --- a/ompi/mca/btl/sctp/btl_sctp_proc.h +++ b/ompi/mca/btl/sctp/btl_sctp_proc.h @@ -20,7 +20,6 @@ #define MCA_BTL_SCTP_PROC_H #include "opal/class/opal_object.h" -#include "orte/mca/ns/ns.h" #include "ompi/proc/proc.h" #include "btl_sctp.h" #include "btl_sctp_addr.h" diff --git a/ompi/mca/btl/sctp/btl_sctp_recv_handler.c b/ompi/mca/btl/sctp/btl_sctp_recv_handler.c index 372b84848f..5007e00c1a 100644 --- a/ompi/mca/btl/sctp/btl_sctp_recv_handler.c +++ b/ompi/mca/btl/sctp/btl_sctp_recv_handler.c @@ -43,7 +43,6 @@ #include "opal/util/argv.h" #include "opal/util/output.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/ns/ns_types.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" diff --git a/ompi/mca/btl/sm/btl_sm_component.c b/ompi/mca/btl/sm/btl_sm_component.c index f0361afb78..5c38a1c27a 100644 --- a/ompi/mca/btl/sm/btl_sm_component.c +++ b/ompi/mca/btl/sm/btl_sm_component.c @@ -45,6 +45,8 @@ #include "opal/util/output.h" #include "orte/util/sys_info.h" #include "orte/util/proc_info.h" +#include "orte/runtime/orte_globals.h" + #include "ompi/mca/pml/pml.h" #include "opal/mca/base/mca_base_param.h" #include "ompi/runtime/ompi_module_exchange.h" @@ -263,7 +265,7 @@ mca_btl_base_module_t** mca_btl_sm_component_init( /* create a named pipe to receive events */ sprintf( mca_btl_sm_component.sm_fifo_path, "%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir, - (unsigned long)orte_process_info.my_name->vpid ); + (unsigned long)ORTE_PROC_MY_NAME->vpid ); if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) { opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno); return NULL; diff --git a/ompi/mca/btl/tcp/btl_tcp_component.c b/ompi/mca/btl/tcp/btl_tcp_component.c index d00ebc6b89..d477be7014 100644 --- a/ompi/mca/btl/tcp/btl_tcp_component.c +++ b/ompi/mca/btl/tcp/btl_tcp_component.c @@ -50,7 +50,7 @@ #include "opal/util/argv.h" #include "opal/util/output.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" diff --git a/ompi/mca/btl/tcp/btl_tcp_endpoint.c b/ompi/mca/btl/tcp/btl_tcp_endpoint.c index 4f18ac5a21..684da05bc1 100644 --- a/ompi/mca/btl/tcp/btl_tcp_endpoint.c +++ b/ompi/mca/btl/tcp/btl_tcp_endpoint.c @@ -344,7 +344,7 @@ bool mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, return false; } - cmpval = orte_ns.compare_fields(ORTE_NS_CMP_ALL, + cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &endpoint_proc->proc_ompi->proc_name, &this_proc->proc_ompi->proc_name); if((btl_endpoint->endpoint_sd < 0) || @@ -475,7 +475,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en } ORTE_PROCESS_NAME_NTOH(guid); /* compare this to the expected values */ - if (0 != orte_ns.compare_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) { + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) { BTL_ERROR(("received unexpected process identifier %s", ORTE_NAME_PRINT(&guid))); mca_btl_tcp_endpoint_close(btl_endpoint); diff --git a/ompi/mca/btl/tcp/btl_tcp_proc.h b/ompi/mca/btl/tcp/btl_tcp_proc.h index 711ba8d750..47f6afbbd1 100644 --- a/ompi/mca/btl/tcp/btl_tcp_proc.h +++ b/ompi/mca/btl/tcp/btl_tcp_proc.h @@ -20,8 +20,8 @@ #define MCA_BTL_TCP_PROC_H #include "opal/class/opal_object.h" -#include "orte/mca/ns/ns.h" #include "ompi/proc/proc.h" +#include "orte/types.h" #include "btl_tcp.h" #include "btl_tcp_addr.h" #include "btl_tcp_endpoint.h" diff --git a/ompi/mca/btl/template/btl_template_endpoint.c b/ompi/mca/btl/template/btl_template_endpoint.c index 8d9ef33ccf..5e8561d4c9 100644 --- a/ompi/mca/btl/template/btl_template_endpoint.c +++ b/ompi/mca/btl/template/btl_template_endpoint.c @@ -21,11 +21,10 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "btl_template.h" #include "btl_template_endpoint.h" #include "btl_template_proc.h" diff --git a/ompi/mca/btl/template/btl_template_proc.h b/ompi/mca/btl/template/btl_template_proc.h index 3577b7208b..85d162a969 100644 --- a/ompi/mca/btl/template/btl_template_proc.h +++ b/ompi/mca/btl/template/btl_template_proc.h @@ -19,7 +19,6 @@ #ifndef MCA_BTL_TEMPLATE_PROC_H #define MCA_BTL_TEMPLATE_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_template.h" diff --git a/ompi/mca/btl/udapl/btl_udapl.h b/ompi/mca/btl/udapl/btl_udapl.h index 399e24f23f..0e7aa69bfb 100644 --- a/ompi/mca/btl/udapl/btl_udapl.h +++ b/ompi/mca/btl/udapl/btl_udapl.h @@ -226,7 +226,7 @@ do { \ if (verbose_level <= mca_btl_udapl_component.udapl_verbosity) { \ mca_btl_base_out("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_PRINT(orte_process_info.my_name), \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ __FILE__, __LINE__, __func__); \ mca_btl_base_out args; \ mca_btl_base_out("\n"); \ diff --git a/ompi/mca/btl/udapl/btl_udapl_endpoint.c b/ompi/mca/btl/udapl/btl_udapl_endpoint.c index fd432b54a1..6c89e20893 100644 --- a/ompi/mca/btl/udapl/btl_udapl_endpoint.c +++ b/ompi/mca/btl/udapl/btl_udapl_endpoint.c @@ -28,14 +28,17 @@ #include "ompi/types.h" #include "opal/include/opal/align.h" #include "opal/util/show_help.h" -#include "orte/mca/ns/base/base.h" + #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "opal/class/opal_pointer_array.h" + #include "ompi/class/ompi_free_list.h" #include "ompi/mca/mpool/rdma/mpool_rdma.h" +#include "ompi/mca/dpm/dpm.h" + #include "ompi/mca/btl/base/btl_base_error.h" #include "btl_udapl.h" #include "btl_udapl_endpoint.h" @@ -44,14 +47,14 @@ #include "btl_udapl_proc.h" static void mca_btl_udapl_endpoint_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint); static int mca_btl_udapl_endpoint_post_recv(mca_btl_udapl_endpoint_t* endpoint, size_t size); void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint); void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); static int mca_btl_udapl_endpoint_finish_eager(mca_btl_udapl_endpoint_t*); static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t*); @@ -272,7 +275,7 @@ int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint, static void mca_btl_udapl_endpoint_send_cb(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { OBJ_RELEASE(buffer); } @@ -467,7 +470,7 @@ int mca_btl_udapl_endpoint_create(mca_btl_udapl_module_t* btl, static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) { mca_btl_udapl_addr_t* addr = &endpoint->endpoint_btl->udapl_addr; - orte_buffer_t* buf = OBJ_NEW(orte_buffer_t); + opal_buffer_t* buf = OBJ_NEW(opal_buffer_t); int rc; if(NULL == buf) { @@ -478,13 +481,13 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), 1); /* Pack our address information */ - rc = orte_dss.pack(buf, &addr->port, 1, ORTE_UINT64); + rc = opal_dss.pack(buf, &addr->port, 1, OPAL_UINT64); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_dss.pack(buf, &addr->addr, sizeof(DAT_SOCK_ADDR), ORTE_UINT8); + rc = opal_dss.pack(buf, &addr->addr, sizeof(DAT_SOCK_ADDR), OPAL_UINT8); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -492,7 +495,7 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) /* Send the buffer */ rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, buf, - ORTE_RML_TAG_UDAPL, 0, mca_btl_udapl_endpoint_send_cb, NULL); + OMPI_RML_TAG_UDAPL, 0, mca_btl_udapl_endpoint_send_cb, NULL); if(0 > rc) { ORTE_ERROR_LOG(rc); return rc; @@ -504,7 +507,7 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, - orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { mca_btl_udapl_addr_t addr; mca_btl_udapl_proc_t* proc; @@ -514,14 +517,14 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, int rc; /* Unpack data */ - rc = orte_dss.unpack(buffer, &addr.port, &cnt, ORTE_UINT64); + rc = opal_dss.unpack(buffer, &addr.port, &cnt, OPAL_UINT64); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; } cnt = sizeof(mca_btl_udapl_addr_t); - rc = orte_dss.unpack(buffer, &addr.addr, &cnt, ORTE_UINT8); + rc = opal_dss.unpack(buffer, &addr.addr, &cnt, OPAL_UINT8); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return; @@ -535,7 +538,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, opal_list_get_end(&mca_btl_udapl_component.udapl_procs); proc = (mca_btl_udapl_proc_t*)opal_list_get_next(proc)) { - if(ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &proc->proc_guid, endpoint)) { + if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &proc->proc_guid, endpoint)) { for(i = 0; i < proc->proc_endpoint_count; i++) { ep = proc->proc_endpoints[i]; @@ -561,7 +564,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, void mca_btl_udapl_endpoint_post_oob_recv(void) { - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UDAPL, + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, OMPI_RML_TAG_UDAPL, ORTE_RML_PERSISTENT, mca_btl_udapl_endpoint_recv, NULL); } @@ -577,7 +580,7 @@ void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint) /* Nasty test to prevent deadlock and unwanted connection attempts */ /* This right here is the whole point of using the ORTE/RML handshake */ if((MCA_BTL_UDAPL_CONN_EAGER == endpoint->endpoint_state && - 0 > orte_ns.compare_fields(ORTE_NS_CMP_ALL, + 0 > orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &endpoint->endpoint_proc->proc_guid, &ompi_proc_local()->proc_name)) || (MCA_BTL_UDAPL_CLOSED != endpoint->endpoint_state && @@ -715,7 +718,7 @@ static int mca_btl_udapl_endpoint_finish_eager( } /* Only one side does dat_ep_connect() */ - if(0 < orte_ns.compare_fields(ORTE_NS_CMP_ALL, + if(0 < orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &endpoint->endpoint_proc->proc_guid, &ompi_proc_local()->proc_name)) { diff --git a/ompi/mca/btl/udapl/btl_udapl_proc.h b/ompi/mca/btl/udapl/btl_udapl_proc.h index 3a9b882dd9..1dd97f7635 100644 --- a/ompi/mca/btl/udapl/btl_udapl_proc.h +++ b/ompi/mca/btl/udapl/btl_udapl_proc.h @@ -20,7 +20,6 @@ #ifndef MCA_BTL_UDAPL_PROC_H #define MCA_BTL_UDAPL_PROC_H -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" #include "ompi/proc/proc.h" #include "btl_udapl.h" diff --git a/ompi/mca/coll/sm/coll_sm.h b/ompi/mca/coll/sm/coll_sm.h index 2eabb91d21..640fcf3a4f 100644 --- a/ompi/mca/coll/sm/coll_sm.h +++ b/ompi/mca/coll/sm/coll_sm.h @@ -24,7 +24,7 @@ #include "mpi.h" #include "opal/mca/mca.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/mpool/mpool.h" #include "ompi/mca/common/sm/common_sm_mmap.h" diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index 9d1c1949cd..938e889245 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -43,7 +43,10 @@ #include "opal/mca/maffinity/maffinity.h" #include "opal/mca/maffinity/base/base.h" #include "opal/util/os_path.h" -#include "orte/mca/ns/ns.h" + +#include "orte/util/proc_info.h" +#include "orte/util/name_fns.h" + #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/base.h" @@ -598,7 +601,7 @@ static int bootstrap_comm(ompi_communicator_t *comm, empty_index = -1; for (i = 0; i < mca_coll_sm_component.sm_bootstrap_num_segments; ++i) { if (comm->c_contextid == bshe->smbhe_keys[i].mcsbck_cid && - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, rank0, &bshe->smbhe_keys[i].mcsbck_rank0_name)) { found = true; diff --git a/ompi/mca/common/portals/common_portals.c b/ompi/mca/common/portals/common_portals.c index 421ba6510d..76f1223e0c 100644 --- a/ompi/mca/common/portals/common_portals.c +++ b/ompi/mca/common/portals/common_portals.c @@ -17,7 +17,7 @@ */ #include "ompi_config.h" - +#include "ompi/constants.h" #include "common_portals.h" diff --git a/ompi/mca/common/sm/common_sm_mmap.c b/ompi/mca/common/sm/common_sm_mmap.c index 5fbf011152..b67084f5cd 100644 --- a/ompi/mca/common/sm/common_sm_mmap.c +++ b/ompi/mca/common/sm/common_sm_mmap.c @@ -51,6 +51,8 @@ #include "orte/mca/rml/rml_types.h" #include "orte/mca/rml/base/base.h" +#include "ompi/mca/dpm/dpm.h" + OBJ_CLASS_INSTANCE( mca_common_sm_mmap_t, opal_object_t, @@ -179,13 +181,13 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name, /* signal the rest of the local procs that the backing file has been created */ for(p=1 ; p < n_local_procs ; p++ ) { - sm_file_created=ORTE_RML_TAG_SM_BACK_FILE_CREATED; + sm_file_created=OMPI_RML_TAG_SM_BACK_FILE_CREATED; iov[0].iov_base=&sm_file_created; iov[0].iov_len=sizeof(sm_file_created); iov[1].iov_base=&sm_file_inited; iov[1].iov_len=sizeof(sm_file_inited); rc=orte_rml.send(&(procs[p]->proc_name),iov,2, - ORTE_RML_TAG_SM_BACK_FILE_CREATED,0); + OMPI_RML_TAG_SM_BACK_FILE_CREATED,0); if( rc < 0 ) { opal_output(0, "mca_common_sm_mmap_init: orte_rml.send failed to %lu with errno=%d\n", @@ -205,7 +207,7 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name, iov[1].iov_base=&sm_file_inited; iov[1].iov_len=sizeof(sm_file_inited); rc=orte_rml.recv(&(procs[0]->proc_name),iov,2, - ORTE_RML_TAG_SM_BACK_FILE_CREATED,0); + OMPI_RML_TAG_SM_BACK_FILE_CREATED,0); if( rc < 0 ) { opal_output(0, "mca_common_sm_mmap_init: orte_rml.recv failed from %ld with errno=%d\n", 0L, errno); diff --git a/ompi/mca/crcp/base/base.h b/ompi/mca/crcp/base/base.h index 942a2e3315..b8de8892a0 100644 --- a/ompi/mca/crcp/base/base.h +++ b/ompi/mca/crcp/base/base.h @@ -22,7 +22,7 @@ #include "ompi/constants.h" #include "orte/mca/rml/rml.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "ompi/mca/crcp/crcp.h" diff --git a/ompi/mca/crcp/base/crcp_base_fns.c b/ompi/mca/crcp/base/crcp_base_fns.c index 5ac7aad1c6..abc1f914aa 100644 --- a/ompi/mca/crcp/base/crcp_base_fns.c +++ b/ompi/mca/crcp/base/crcp_base_fns.c @@ -31,9 +31,6 @@ #include "opal/util/output.h" #include "opal/util/os_dirpath.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/gpr/gpr.h" - #include "ompi/communicator/communicator.h" #include "ompi/proc/proc.h" #include "opal/mca/base/mca_base_param.h" diff --git a/ompi/mca/crcp/coord/crcp_coord.h b/ompi/mca/crcp/coord/crcp_coord.h index bdaa174380..3027dbac02 100644 --- a/ompi/mca/crcp/coord/crcp_coord.h +++ b/ompi/mca/crcp/coord/crcp_coord.h @@ -29,7 +29,6 @@ #include "opal/mca/mca.h" #include "ompi/mca/crcp/crcp.h" #include "ompi/communicator/communicator.h" -#include "orte/mca/ns/ns.h" #include "opal/runtime/opal_cr.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" diff --git a/ompi/mca/crcp/coord/crcp_coord_btl.h b/ompi/mca/crcp/coord/crcp_coord_btl.h index 03ed8b3b21..51d05fe00b 100644 --- a/ompi/mca/crcp/coord/crcp_coord_btl.h +++ b/ompi/mca/crcp/coord/crcp_coord_btl.h @@ -29,7 +29,6 @@ #include "opal/mca/mca.h" #include "ompi/mca/crcp/crcp.h" #include "ompi/communicator/communicator.h" -#include "orte/mca/ns/ns.h" #include "opal/runtime/opal_cr.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" diff --git a/ompi/mca/crcp/coord/crcp_coord_pml.c b/ompi/mca/crcp/coord/crcp_coord_pml.c index d6c1b282ec..8251501afd 100644 --- a/ompi/mca/crcp/coord/crcp_coord_pml.c +++ b/ompi/mca/crcp/coord/crcp_coord_pml.c @@ -171,8 +171,11 @@ #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" #include "ompi/request/request.h" #include "ompi/datatype/dt_arch.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" #include "ompi/mca/pml/base/pml_base_request.h" @@ -279,7 +282,7 @@ static int recv_bookmarks(int peer_idx); */ static void recv_bookmarks_cbfunc(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata); static int total_recv_bookmarks = 0; @@ -367,7 +370,7 @@ static int ft_event_post_drain_acks(void); */ static void drain_message_ack_cbfunc(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata); @@ -766,7 +769,7 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_state_t, #define PACK_BUFFER(buffer, var, count, type, error_msg) \ { \ - if (OMPI_SUCCESS != (ret = orte_dss.pack(buffer, &(var), count, type)) ) { \ + if (OMPI_SUCCESS != (ret = opal_dss.pack(buffer, &(var), count, type)) ) { \ opal_output(mca_crcp_coord_component.super.output_handle, \ "%s (Return %d)", error_msg, ret); \ exit_status = ret; \ @@ -777,7 +780,7 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_state_t, #define UNPACK_BUFFER(buffer, var, count, type, error_msg) \ { \ orte_std_cntr_t n = count; \ - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &(var), &n, type)) ) { \ + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(var), &n, type)) ) { \ opal_output(mca_crcp_coord_component.super.output_handle, \ "%s (Return %d)", error_msg, ret); \ exit_status = ret; \ @@ -2703,6 +2706,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_ft_event( ompi_crcp_base_pml_state_t* pml_state) { static int step_to_return_to = 0; + opal_list_item_t* item = NULL; int exit_status = OMPI_SUCCESS; int ret; @@ -2782,6 +2786,19 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_ft_event( goto DONE; } + /* + * Refresh the jobids + */ + for(item = opal_list_get_first(&ompi_crcp_coord_pml_peer_refs); + item != opal_list_get_end(&ompi_crcp_coord_pml_peer_refs); + item = opal_list_get_next(item) ) { + ompi_crcp_coord_pml_peer_ref_t *cur_peer_ref; + cur_peer_ref = (ompi_crcp_coord_pml_peer_ref_t*)item; + + /* JJH - Assuming only one global jobid at the moment */ + cur_peer_ref->proc_name.jobid = ORTE_PROC_MY_NAME->jobid; + } + /* * Finish the coord protocol */ @@ -2832,9 +2849,9 @@ static ompi_crcp_coord_pml_peer_ref_t * find_peer(orte_process_name_t proc) ompi_crcp_coord_pml_peer_ref_t *cur_peer_ref; cur_peer_ref = (ompi_crcp_coord_pml_peer_ref_t*)item; - if( 0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, - &(cur_peer_ref->proc_name), - &proc) ) { + if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &(cur_peer_ref->proc_name), + &proc) ) { return cur_peer_ref; } } @@ -2993,7 +3010,7 @@ static int ft_event_coordinate_peers(void) if( stall_for_completion ) { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: ft_event_coordinate_peers: %s **** STALLING ***", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); step_to_return_to = 1; exit_status = OMPI_SUCCESS; goto DONE; @@ -3019,7 +3036,7 @@ static int ft_event_coordinate_peers(void) OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle, "crcp:coord: ft_event_coordinate_peers: %s Coordination Finished...\n", - ORTE_NAME_PRINT(orte_process_info.my_name) )); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* * Now that all our peer channels are marked as drained @@ -3099,7 +3116,7 @@ static int ft_event_finalize_exchange(void) static int ft_event_exchange_bookmarks(void) { int peer_idx = 0; - int my_idx = orte_process_info.my_name->vpid; + int my_idx = ORTE_PROC_MY_NAME->vpid; int iter = 0; int num_peers = 0; @@ -3144,13 +3161,13 @@ static int ft_event_check_bookmarks(void) int p_n_from_p_m = 0; if( 10 <= mca_crcp_coord_component.super.verbose ) { - sleep(orte_process_info.my_name->vpid); + sleep(ORTE_PROC_MY_NAME->vpid); OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "Process %s Match Table", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "%s %5s | %7s | %7s | %7s | %7s |", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), "Vpid", "T_Send", "M_Recv", "M_Send", "T_Recv")); for(item = opal_list_get_first(&ompi_crcp_coord_pml_peer_refs); @@ -3176,7 +3193,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "%s %5d | %7d | %7d | %7d | %7d |", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), peer_ref->proc_name.vpid, t_send, m_recv, m_send, t_recv)); } @@ -3193,14 +3210,14 @@ static int ft_event_check_bookmarks(void) ompi_crcp_coord_pml_peer_ref_t *peer_ref; peer_ref = (ompi_crcp_coord_pml_peer_ref_t*)item; - if( 0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, - (orte_process_info.my_name), - &(peer_ref->proc_name)) ) { + if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + (ORTE_PROC_MY_NAME), + &(peer_ref->proc_name)) ) { continue; } /* Lowest Rank sends first */ - if( orte_process_info.my_name->vpid < peer_ref->proc_name.vpid ) { + if( ORTE_PROC_MY_NAME->vpid < peer_ref->proc_name.vpid ) { /******************** * Check P_n --> P_m * Has the peer received all the messages that I have put on the wire? @@ -3218,7 +3235,7 @@ static int ft_event_check_bookmarks(void) "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: Peer received more than was sent. :(\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3232,7 +3249,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d). Peer needs %4d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3269,7 +3286,7 @@ static int ft_event_check_bookmarks(void) "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: I received more than the peer sent. :(\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3283,7 +3300,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: check_bookmarks: %s <-- %s " "Received Msgs (%4d) = Sent Msgs (%4d). I need %4d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3321,7 +3338,7 @@ static int ft_event_check_bookmarks(void) "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: I received more than the peer sent. :(\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3335,7 +3352,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: check_bookmarks: %s <-- %s " "Received Msgs (%4d) = Sent Msgs (%4d). I need %4d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3371,7 +3388,7 @@ static int ft_event_check_bookmarks(void) "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: Peer received more than was sent. :(\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3385,7 +3402,7 @@ static int ft_event_check_bookmarks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d). Peer needs %4d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, @@ -3433,7 +3450,7 @@ static int ft_event_post_drain_acks(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drain_ack: %s Wait on %d Drain ACK Messages.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)req_size)); /* @@ -3453,7 +3470,7 @@ static int ft_event_post_drain_acks(void) NULL) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drain_acks: %s Failed to post a RML receive to the peer\n", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); return ret; } } @@ -3463,7 +3480,7 @@ static int ft_event_post_drain_acks(void) static void drain_message_ack_cbfunc(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata) { @@ -3474,7 +3491,7 @@ static void drain_message_ack_cbfunc(int status, /* * Unpack the buffer */ - UNPACK_BUFFER(buffer, ckpt_status, 1, ORTE_SIZE, ""); + UNPACK_BUFFER(buffer, ckpt_status, 1, OPAL_SIZE, ""); /* * Update the outstanding message queue @@ -3488,13 +3505,14 @@ static void drain_message_ack_cbfunc(int status, /* If this ACK has not completed yet */ if(!drain_msg_ack->complete) { /* If it is the correct peer */ - if(drain_msg_ack->peer.jobid == sender->jobid && - drain_msg_ack->peer.vpid == sender->vpid ) { + if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &(drain_msg_ack->peer), + sender) ) { /* We found it! */ drain_msg_ack->complete = true; OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle, "crcp:coord: drain_message_ack_cbfunc: %s --> %s Received ACK of FLUSH from peer\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender) )); return; } @@ -3503,7 +3521,7 @@ static void drain_message_ack_cbfunc(int status, opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: drain_message_ack_cbfunc: %s --> %s ERROR: Uable to match ACK to peer\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender) ); cleanup: @@ -3523,7 +3541,7 @@ static int ft_event_post_drained(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drained: %s Draining %d Messages.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)req_size)); /* @@ -3544,7 +3562,7 @@ static int ft_event_post_drained(void) if( drain_msg->already_posted ) { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drained: %s Found a message that we don't need to post.\n", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); continue; } /* @@ -3553,7 +3571,7 @@ static int ft_event_post_drained(void) else { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drained: %s Posting a message to be drained from %d.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), drain_msg->rank)); if( OMPI_SUCCESS != (ret = wrapped_pml_module->pml_irecv(drain_msg->buffer, (drain_msg->count * drain_msg->ddt_size), @@ -3564,7 +3582,7 @@ static int ft_event_post_drained(void) &(drain_msg->request) ) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: post_drained: %s Failed to post the Draining PML iRecv\n", - ORTE_NAME_PRINT(orte_process_info.my_name) ); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); return ret; } } @@ -3584,7 +3602,7 @@ static int ft_event_wait_quiesce(void) if( OMPI_SUCCESS != (ret = wait_quiesce_drained() ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce: %s Failed to quiesce drained messages\n", - ORTE_NAME_PRINT(orte_process_info.my_name) ); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); exit_status = ret; goto cleanup; } @@ -3595,7 +3613,7 @@ static int ft_event_wait_quiesce(void) if( OMPI_SUCCESS != (ret = wait_quiesce_drain_ack() ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce: %s Failed to recv all drain ACKs\n", - ORTE_NAME_PRINT(orte_process_info.my_name) ); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); exit_status = ret; goto cleanup; } @@ -3628,7 +3646,7 @@ static int wait_quiesce_drained(void) OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce_drained: %s Waiting on %d messages to drain\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)req_size)); /* @@ -3683,13 +3701,13 @@ static int wait_quiesce_drained(void) if( drain_msg->already_posted && NULL == drain_msg->request) { OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce_drained: %s - %s Already posted this msg.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(drain_msg->proc_name)) )); } else { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce_drained: %s - %s Waiting on message. (index = %d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(drain_msg->proc_name)), (int)wait_any_count)); @@ -3704,8 +3722,9 @@ static int wait_quiesce_drained(void) /* Add proc to response queue if it is not already there */ found = false; for(i = 0; i < last_proc_idx; ++i) { - if(proc_names[i].jobid == drain_msg->proc_name.jobid && - proc_names[i].vpid == drain_msg->proc_name.vpid ) { + if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &(proc_names[i]), + &(drain_msg->proc_name) ) ) { found = true; break; } @@ -3713,7 +3732,7 @@ static int wait_quiesce_drained(void) if( !found ) { OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce: %s - %s Add process to response list [idx %d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(drain_msg->proc_name)), (int)last_proc_idx)); @@ -3741,19 +3760,19 @@ static int wait_quiesce_drained(void) */ OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce: %s Send ACKs to all Peers\n", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); for(i = 0; i < last_proc_idx; ++i) { - orte_buffer_t *buffer = NULL; + opal_buffer_t *buffer = NULL; size_t response = 1; /* Send All Clear to Peer */ - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = OMPI_ERROR; goto cleanup; } - PACK_BUFFER(buffer, response, 1, ORTE_SIZE, ""); + PACK_BUFFER(buffer, response, 1, OPAL_SIZE, ""); if ( 0 > ( ret = orte_rml.send_buffer(&(proc_names[i]), buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { exit_status = ret; @@ -3821,7 +3840,7 @@ static int coord_request_wait_all( size_t count, OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: request_wait_all: %s Done with idx %d of %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)i, (int)count)); } @@ -3858,7 +3877,7 @@ static int wait_quiesce_drain_ack(void) OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: wait_quiesce_drain_ack: %s Waiting on %d Drain ACK messages\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_outstanding)); while(0 < num_outstanding) { @@ -3892,7 +3911,7 @@ static int send_bookmarks(int peer_idx) { ompi_crcp_coord_pml_peer_ref_t *peer_ref; orte_process_name_t peer_name; - orte_buffer_t *buffer = NULL; + opal_buffer_t *buffer = NULL; int exit_status = OMPI_SUCCESS; int ret; @@ -3900,7 +3919,7 @@ static int send_bookmarks(int peer_idx) /* * Find the peer structure for this peer */ - peer_name.jobid = orte_process_info.my_name->jobid; + peer_name.jobid = ORTE_PROC_MY_NAME->jobid; peer_name.vpid = peer_idx; if( NULL == (peer_ref = find_peer(peer_name))) { @@ -3913,7 +3932,7 @@ static int send_bookmarks(int peer_idx) OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: send_bookmarks: %s -> %s Sending bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer_name), peer_ref->total_send_msgs, peer_ref->total_isend_msgs, @@ -3925,23 +3944,23 @@ static int send_bookmarks(int peer_idx) /* * Send the bookmarks to peer */ - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = OMPI_ERROR; goto cleanup; } - PACK_BUFFER(buffer, (peer_ref->total_send_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_send_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_send_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_isend_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_isend_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_isend_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_send_init_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_send_init_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_send_init_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_recv_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_recv_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_recv_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_irecv_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_irecv_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_irecv_msgs"); - PACK_BUFFER(buffer, (peer_ref->total_recv_init_msgs), 1, ORTE_UINT32, + PACK_BUFFER(buffer, (peer_ref->total_recv_init_msgs), 1, OPAL_UINT32, "crcp:coord: send_bookmarks: Unable to pack total_recv_init_msgs"); if ( 0 > ( ret = orte_rml.send_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { @@ -3975,7 +3994,7 @@ static int recv_bookmarks(int peer_idx) START_TIMER(CRCP_TIMER_CKPT_PEER_R); - peer_name.jobid = orte_process_info.my_name->jobid; + peer_name.jobid = ORTE_PROC_MY_NAME->jobid; peer_name.vpid = peer_idx; if ( 0 > (ret = orte_rml.recv_buffer_nb(&peer_name, @@ -4005,7 +4024,7 @@ static int recv_bookmarks(int peer_idx) { ompi_crcp_coord_pml_peer_ref_t *peer_ref; orte_process_name_t peer_name; - orte_buffer_t * buffer = NULL; + opal_buffer_t * buffer = NULL; int exit_status = OMPI_SUCCESS; int ret, tmp_int; @@ -4014,7 +4033,7 @@ static int recv_bookmarks(int peer_idx) /* * Find the peer structure for this peer */ - peer_name.jobid = orte_process_info.my_name->jobid; + peer_name.jobid = ORTE_PROC_MY_NAME->jobid; peer_name.vpid = peer_idx; if( NULL == (peer_ref = find_peer(peer_name))) { @@ -4028,7 +4047,7 @@ static int recv_bookmarks(int peer_idx) /* * Receive the bookmark from peer */ - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = ORTE_ERROR; goto cleanup; } @@ -4042,29 +4061,29 @@ static int recv_bookmarks(int peer_idx) goto cleanup; } - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_send_msgs"); peer_ref->matched_send_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_isend_msgs"); peer_ref->matched_isend_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_send_init_msgs"); peer_ref->matched_send_init_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_recv_msgs"); peer_ref->matched_recv_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_irecv_msgs"); peer_ref->matched_irecv_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_recv_init_msgs"); peer_ref->matched_recv_init_msgs = tmp_int; OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_bookmarks: %s <- %s Received bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer_name), peer_ref->matched_send_msgs, peer_ref->matched_isend_msgs, @@ -4087,7 +4106,7 @@ static int recv_bookmarks(int peer_idx) static void recv_bookmarks_cbfunc(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata) { @@ -4111,29 +4130,29 @@ static void recv_bookmarks_cbfunc(int status, goto cleanup; } - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_send_msgs"); peer_ref->matched_send_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_isend_msgs"); peer_ref->matched_isend_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_send_init_msgs"); peer_ref->matched_send_init_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_recv_msgs"); peer_ref->matched_recv_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_irecv_msgs"); peer_ref->matched_irecv_msgs = tmp_int; - UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, "crcp:coord: recv_bookmarks: Unable to unpack total_recv_init_msgs"); peer_ref->matched_recv_init_msgs = tmp_int; OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_bookmarks: %s <- %s Received bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender), peer_ref->matched_send_msgs, peer_ref->matched_isend_msgs, @@ -4189,7 +4208,7 @@ static int send_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if(OMPI_SUCCESS != (ret = do_send_msg_detail(peer_ref, msg_ref, &found_match, &finished)) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: send_msg_details: %s --> %s Failed to send message details to peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); } @@ -4240,7 +4259,7 @@ static int send_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, opal_list_append(&drained_msg_ack_list, &(d_msg_ack->super)); OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: send_msg_details: %s <--> %s Will wait on ACK from this peer.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)))); /* @@ -4261,7 +4280,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, bool *found_match, bool *finished) { - orte_buffer_t *buffer = NULL; + opal_buffer_t *buffer = NULL; int32_t req_more = -1; int comm_my_rank = -1; int exit_status = OMPI_SUCCESS; @@ -4275,7 +4294,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, buffer = NULL; } - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = OMPI_ERROR; goto cleanup; } @@ -4287,9 +4306,9 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, */ comm_my_rank = ompi_comm_rank(msg_ref->comm); - PACK_BUFFER(buffer, msg_ref->comm->c_contextid, 1, ORTE_UINT32, + PACK_BUFFER(buffer, msg_ref->comm->c_contextid, 1, OPAL_UINT32, "crcp:coord: send_msg_details: Unable to pack communicator ID"); - PACK_BUFFER(buffer, comm_my_rank, 1, ORTE_INT, + PACK_BUFFER(buffer, comm_my_rank, 1, OPAL_INT, "crcp:coord: send_msg_details: Unable to pack comm rank ID"); /* @@ -4298,11 +4317,11 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, * - Message count * - Message Datatype size */ - PACK_BUFFER(buffer, msg_ref->tag, 1, ORTE_INT, + PACK_BUFFER(buffer, msg_ref->tag, 1, OPAL_INT, "crcp:coord: send_msg_details: Unable to pack tag"); - PACK_BUFFER(buffer, msg_ref->count, 1, ORTE_SIZE, + PACK_BUFFER(buffer, msg_ref->count, 1, OPAL_SIZE, "crcp:coord: send_msg_details: Unable to pack count"); - PACK_BUFFER(buffer, msg_ref->ddt_size, 1, ORTE_SIZE, + PACK_BUFFER(buffer, msg_ref->ddt_size, 1, OPAL_SIZE, "crcp:coord: send_msg_details: Unable to pack datatype size"); /* @@ -4327,7 +4346,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, /* * Check return value from peer to see if we found a match. */ - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = ORTE_ERROR; goto cleanup; } @@ -4339,14 +4358,14 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: do_send_msg_detail: %s --> %s Failed to receive ACK buffer from peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; goto cleanup; } - UNPACK_BUFFER(buffer, req_more, 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, req_more, 1, OPAL_UINT32, "crcp:coord: send_msg_details: Failed to unpack the ACK from peer buffer."); /* Mark message as matched */ @@ -4411,7 +4430,7 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_details: %s <-- %s " "Failed to receive message detail from peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4431,7 +4450,7 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_details: %s <-- %s " "Failed to check message detail from peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4458,7 +4477,7 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if(OMPI_SUCCESS != (ret = do_recv_msg_detail_resp(peer_ref, response))) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_details: %s <-- %s Failed to respond to peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4475,11 +4494,11 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, int *rank, uint32_t *comm_id, int *tag, size_t *count, size_t *datatype_size) { - orte_buffer_t * buffer = NULL; + opal_buffer_t * buffer = NULL; int exit_status = OMPI_SUCCESS; int ret; - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = ORTE_ERROR; goto cleanup; } @@ -4490,7 +4509,7 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: do_recv_msg_detail: %s <-- %s Failed to receive buffer from peer. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4498,17 +4517,17 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, } /* Pull out the communicator ID */ - UNPACK_BUFFER(buffer, (*comm_id), 1, ORTE_UINT32, + UNPACK_BUFFER(buffer, (*comm_id), 1, OPAL_UINT32, "crcp:coord: recv_msg_details: Failed to unpack the communicator ID"); - UNPACK_BUFFER(buffer, (*rank), 1, ORTE_INT, + UNPACK_BUFFER(buffer, (*rank), 1, OPAL_INT, "crcp:coord: recv_msg_details: Failed to unpack the communicator rank ID"); /* Pull out the message details */ - UNPACK_BUFFER(buffer, (*tag), 1, ORTE_INT, + UNPACK_BUFFER(buffer, (*tag), 1, OPAL_INT, "crcp:coord: recv_msg_details: Failed to unpack the tag"); - UNPACK_BUFFER(buffer, (*count), 1, ORTE_SIZE, + UNPACK_BUFFER(buffer, (*count), 1, OPAL_SIZE, "crcp:coord: recv_msg_details: Failed to unpack the count"); - UNPACK_BUFFER(buffer, (*datatype_size), 1, ORTE_SIZE, + UNPACK_BUFFER(buffer, (*datatype_size), 1, OPAL_SIZE, "crcp:coord: recv_msg_details: Failed to unpack the datatype size"); cleanup: @@ -4552,7 +4571,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s -- %s " "Failed to determine if we have received this message. Return %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; @@ -4562,7 +4581,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((20, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s -- %s" " found %s, complete %s, posted %s, peer_rank=[%d vs %d]\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer_ref->proc_name)), (true == msg_found ? "True " : "False"), (true == msg_complete ? "True " : "False"), @@ -4580,7 +4599,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s Found a message that needs to be drained\n", - ORTE_NAME_PRINT(orte_process_info.my_name) )); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* * Construct a message for draining @@ -4639,7 +4658,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s " "Found a message already posted! Prepare to drain.\n", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* * If this is the current blocking recv, @@ -4650,7 +4669,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s " "Found a message already posted! Prepare to STALL.\n", - ORTE_NAME_PRINT(orte_process_info.my_name))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); stall_for_completion = true; } /* @@ -4661,7 +4680,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: %s " "Found a message already posted! No stall required [%3d, %3d, %3d, %3d].\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)current_msg_id, (int)current_msg_type, (int)posted_msg_ref->msg_id, @@ -4679,7 +4698,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, * messages. * JJH -- When do we use this? */ - if (posted_msg_ref->rank != peer_ref->proc_name.vpid) { + if (posted_msg_ref->rank != (int)peer_ref->proc_name.vpid) { posted_msg_ref->suggested_rank = rank; } @@ -4709,7 +4728,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, else { opal_output(mca_crcp_coord_component.super.output_handle, "crcp:coord: recv_msg_detail_check: ***** ERROR ***** %s Failed to find an action to use. This should never happen!\n", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); exit_status = OMPI_ERROR; goto cleanup; } @@ -4989,16 +5008,16 @@ static int find_message_named(opal_list_t * search_list, static int do_recv_msg_detail_resp(ompi_crcp_coord_pml_peer_ref_t *peer_ref, int resp) { - orte_buffer_t * buffer = NULL; + opal_buffer_t * buffer = NULL; int exit_status = OMPI_SUCCESS; int ret; - if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) { + if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { exit_status = OMPI_ERROR; goto cleanup; } - PACK_BUFFER(buffer, resp, 1, ORTE_UINT32, + PACK_BUFFER(buffer, resp, 1, OPAL_UINT32, "crcp:coord: recv_msg_details: Unable to ask peer for more messages"); if ( 0 > ( ret = orte_rml.send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { @@ -5066,7 +5085,7 @@ static void display_all_timers(int state) { static void display_indv_timer(int idx, int var) { double diff = timer_end[idx] - timer_start[idx]; - if( 0 != orte_process_info.my_name->vpid ) { + if( 0 != ORTE_PROC_MY_NAME->vpid ) { return; } diff --git a/ompi/mca/crcp/coord/crcp_coord_pml.h b/ompi/mca/crcp/coord/crcp_coord_pml.h index 42b2d29316..17237e5cf1 100644 --- a/ompi/mca/crcp/coord/crcp_coord_pml.h +++ b/ompi/mca/crcp/coord/crcp_coord_pml.h @@ -29,7 +29,6 @@ #include "opal/mca/mca.h" #include "ompi/mca/crcp/crcp.h" #include "ompi/communicator/communicator.h" -#include "orte/mca/ns/ns.h" #include "opal/runtime/opal_cr.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" diff --git a/orte/mca/ns/Makefile.am b/ompi/mca/dpm/Makefile.am similarity index 73% rename from orte/mca/ns/Makefile.am rename to ompi/mca/dpm/Makefile.am index e28ba7e551..60a87eace4 100644 --- a/orte/mca/ns/Makefile.am +++ b/ompi/mca/dpm/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University @@ -17,22 +17,22 @@ # # main library setup -noinst_LTLIBRARIES = libmca_ns.la -libmca_ns_la_SOURCES = +noinst_LTLIBRARIES = libmca_dpm.la +libmca_dpm_la_SOURCES = # header setup -nobase_orte_HEADERS = +nobase_ompi_HEADERS = # local files -headers = ns.h ns_types.h -libmca_ns_la_SOURCES += $(headers) +headers = dpm.h +libmca_dpm_la_SOURCES += $(headers) # Conditionally install the header files if WANT_INSTALL_HEADERS -nobase_orte_HEADERS += $(headers) -ortedir = $(includedir)/openmpi/orte/mca/ns +nobase_ompi_HEADERS += $(headers) +ompidir = $(includedir)/openmpi/ompi/mca/dpm else -ortedir = $(includedir) +ompidir = $(includedir) endif include base/Makefile.am diff --git a/ompi/mca/dpm/base/Makefile.am b/ompi/mca/dpm/base/Makefile.am new file mode 100644 index 0000000000..f6474cdf4c --- /dev/null +++ b/ompi/mca/dpm/base/Makefile.am @@ -0,0 +1,29 @@ +# +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_pkgdata_DATA = base/help-ompi-dpm-base.txt + +headers += \ + base/base.h + +libmca_dpm_la_SOURCES += \ + base/dpm_base_open.c \ + base/dpm_base_close.c \ + base/dpm_base_select.c \ + base/dpm_base_common_fns.c + diff --git a/ompi/mca/dpm/base/base.h b/ompi/mca/dpm/base/base.h new file mode 100644 index 0000000000..d91257ef2e --- /dev/null +++ b/ompi/mca/dpm/base/base.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef OMPI_MCA_DPM_BASE_H +#define OMPI_MCA_DPM_BASE_H + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "ompi/mca/dpm/dpm.h" + +/* + * Global functions for MCA overall DPM + */ + +BEGIN_C_DECLS + +struct ompi_dpm_base_disconnect_obj { + ompi_communicator_t *comm; + int size; + struct ompi_request_t **reqs; + int buf; +}; +typedef struct ompi_dpm_base_disconnect_obj ompi_dpm_base_disconnect_obj; + +/** + * Initialize the DPM MCA framework + * + * @retval OMPI_SUCCESS Upon success + * @retval OMPI_ERROR Upon failures + * + * This function is invoked during ompi_init(); + */ +OMPI_DECLSPEC int ompi_dpm_base_open(void); + +/** + * Select an available component. + * + * @retval OMPI_SUCCESS Upon Success + * @retval OMPI_NOT_FOUND If no component can be selected + * @retval OMPI_ERROR Upon other failure + * + */ +OMPI_DECLSPEC int ompi_dpm_base_select(void); + +/** + * Finalize the DPM MCA framework + * + * @retval OMPI_SUCCESS Upon success + * @retval OMPI_ERROR Upon failures + * + * This function is invoked during ompi_finalize(); + */ +OMPI_DECLSPEC int ompi_dpm_base_close(void); + +/* Internal support functions */ +OMPI_DECLSPEC char* ompi_dpm_base_dyn_init (void); +OMPI_DECLSPEC int ompi_dpm_base_dyn_finalize (void); +OMPI_DECLSPEC void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm); +OMPI_DECLSPEC ompi_dpm_base_disconnect_obj *ompi_dpm_base_disconnect_init ( ompi_communicator_t *comm); +OMPI_DECLSPEC void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs); + + +/* useful globals */ +OMPI_DECLSPEC extern int ompi_dpm_base_output; +OMPI_DECLSPEC extern opal_list_t ompi_dpm_base_components_available; +OMPI_DECLSPEC extern ompi_dpm_base_component_t ompi_dpm_base_selected_component; +OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm; + +END_C_DECLS + +#endif /* OMPI_MCA_DPM_BASE_H */ diff --git a/ompi/mca/dpm/base/dpm_base_close.c b/ompi/mca/dpm/base/dpm_base_close.c new file mode 100644 index 0000000000..291b02d408 --- /dev/null +++ b/ompi/mca/dpm/base/dpm_base_close.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "ompi/mca/dpm/dpm.h" +#include "ompi/mca/dpm/base/base.h" + +int ompi_dpm_base_close(void) +{ + /* Close the selected component */ + if( NULL != ompi_dpm.finalize ) { + ompi_dpm.finalize(); + } + + /* Close all available modules that are open */ + mca_base_components_close(ompi_dpm_base_output, + &ompi_dpm_base_components_available, + NULL); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/dpm/base/dpm_base_common_fns.c b/ompi/mca/dpm/base/dpm_base_common_fns.c new file mode 100644 index 0000000000..c98e48a262 --- /dev/null +++ b/ompi/mca/dpm/base/dpm_base_common_fns.c @@ -0,0 +1,279 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2007 University of Houston. All rights reserved. + * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include +#include + +#include "ompi/request/request.h" +#include "ompi/mca/dpm/dpm.h" +#include "ompi/mca/pml/pml.h" + +#include "ompi/mca/dpm/base/base.h" + + +char* ompi_dpm_base_dyn_init (void) +{ + char *envvarname=NULL, *port_name=NULL; + + /* check for appropriate env variable */ + asprintf(&envvarname, "OMPI_PARENT_PORT"); + port_name = getenv(envvarname); + free (envvarname); + + return port_name; +} + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* this routine runs through the list of communicators + and does the disconnect for all dynamic communicators */ +int ompi_dpm_base_dyn_finalize (void) +{ + int i,j=0, max=0; + ompi_dpm_base_disconnect_obj **objs=NULL; + ompi_communicator_t *comm=NULL; + + if ( 1 size = ompi_comm_remote_size (comm); + } else { + obj->size = ompi_comm_size (comm); + } + + obj->comm = comm; + obj->reqs = (ompi_request_t **) malloc(2*obj->size*sizeof(ompi_request_t *)); + if ( NULL == obj->reqs ) { + free (obj); + return NULL; + } + + /* initiate all isend_irecvs. We use a dummy buffer stored on + the object, since we are sending zero size messages anyway. */ + for ( i=0; i < obj->size; i++ ) { + ret = MCA_PML_CALL(irecv (&(obj->buf), 0, MPI_INT, i, + OMPI_COMM_BARRIER_TAG, comm, + &(obj->reqs[2*i]))); + + if ( OMPI_SUCCESS != ret ) { + free (obj->reqs); + free (obj); + return NULL; + } + + ret = MCA_PML_CALL(isend (&(obj->buf), 0, MPI_INT, i, + OMPI_COMM_BARRIER_TAG, + MCA_PML_BASE_SEND_SYNCHRONOUS, + comm, &(obj->reqs[2*i+1]))); + + if ( OMPI_SUCCESS != ret ) { + free (obj->reqs); + free (obj); + return NULL; + } + } + + /* return handle */ + return obj; +} +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* - count how many requests are active + * - generate a request array large enough to hold + all active requests + * - call waitall on the overall request array + * - free the objects + */ +void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs) +{ + + ompi_request_t **reqs=NULL; + char *treq=NULL; + int totalcount = 0; + int i; + int ret; + + for (i=0; isize; + } + + reqs = (ompi_request_t **) malloc (2*totalcount*sizeof(ompi_request_t *)); + if ( NULL == reqs ) { + printf("ompi_comm_disconnect_waitall: error allocating memory\n"); + return; + } + + /* generate a single, large array of pending requests */ + treq = (char *)reqs; + for (i=0; ireqs, 2*objs[i]->size * sizeof(ompi_request_t *)); + treq += 2*objs[i]->size * sizeof(ompi_request_t *); + } + + /* force all non-blocking all-to-alls to finish */ + ret = ompi_request_wait_all (2*totalcount, reqs, MPI_STATUSES_IGNORE); + + /* Finally, free everything */ + for (i=0; i< count; i++ ) { + if (NULL != objs[i]->reqs ) { + free (objs[i]->reqs ); + free (objs[i]); + } + } + + free (reqs); + + /* decrease the counter for dynamic communicators by 'count'. + Attention, this approach now requires, that we are just using + these routines for communicators which have been flagged dynamic */ + ompi_comm_num_dyncomm -=count; + + return; +} + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +#define OMPI_DPM_BASE_MAXJOBIDS 64 +void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm) +{ + int i, j, numjobids=0; + int size, rsize; + int found; + orte_jobid_t jobids[OMPI_DPM_BASE_MAXJOBIDS], thisjobid; + ompi_group_t *grp=NULL; + ompi_proc_t *proc = NULL; + + /* special case for MPI_COMM_NULL */ + if ( comm == MPI_COMM_NULL ) { + return; + } + + size = ompi_comm_size (comm); + rsize = ompi_comm_remote_size(comm); + + /* loop over all processes in local group and count number + of different jobids. */ + grp = comm->c_local_group; + for (i=0; i< size; i++) { + proc = ompi_group_peer_lookup(grp,i); + thisjobid = proc->proc_name.jobid; + found = 0; + for ( j=0; jc_remote_group; + for (i=0; i< rsize; i++) { + proc = ompi_group_peer_lookup(grp,i); + thisjobid = proc->proc_name.jobid; + found = 0; + for ( j=0; j 1 ) { + ompi_comm_num_dyncomm++; + OMPI_COMM_SET_DYNAMIC(comm); + } + + return; +} diff --git a/ompi/mca/dpm/base/dpm_base_open.c b/ompi/mca/dpm/base/dpm_base_open.c new file mode 100644 index 0000000000..420667a1d8 --- /dev/null +++ b/ompi/mca/dpm/base/dpm_base_open.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "ompi/mca/dpm/dpm.h" +#include "ompi/mca/dpm/base/base.h" + +#include "ompi/mca/dpm/base/static-components.h" + +/* + * Globals + */ +OMPI_DECLSPEC int ompi_dpm_base_output = -1; +OMPI_DECLSPEC ompi_dpm_base_module_t ompi_dpm; +opal_list_t ompi_dpm_base_components_available; +ompi_dpm_base_component_t ompi_dpm_base_selected_component; + +/** + * Function for finding and opening either all MCA components, + * or the one that was specifically requested via a MCA parameter. + */ +int ompi_dpm_base_open(void) +{ + /* Debugging/Verbose output */ + ompi_dpm_base_output = opal_output_open(NULL); + + /* Open up all available components */ + if (OPAL_SUCCESS != + mca_base_components_open("dpm", + ompi_dpm_base_output, + mca_dpm_base_static_components, + &ompi_dpm_base_components_available, + true)) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/dpm/base/dpm_base_select.c b/ompi/mca/dpm/base/dpm_base_select.c new file mode 100644 index 0000000000..e4cb3b836c --- /dev/null +++ b/ompi/mca/dpm/base/dpm_base_select.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/base/mca_base_component_repository.h" + +#include "ompi/mca/dpm/dpm.h" +#include "ompi/mca/dpm/base/base.h" + + +int ompi_dpm_base_select(void) +{ + opal_list_item_t *item; + mca_base_component_list_item_t *cli; + ompi_dpm_base_component_t *component, *best_component = NULL; + ompi_dpm_base_module_t *module, *best_module = NULL; + int priority, best_priority = -1; + int rc; + + /* Query all the opened components and see if they want to run */ + + for (item = opal_list_get_first(&ompi_dpm_base_components_available); + opal_list_get_end(&ompi_dpm_base_components_available) != item; + item = opal_list_get_next(item)) { + cli = (mca_base_component_list_item_t *) item; + component = (ompi_dpm_base_component_t *) cli->cli_component; + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: querying component %s", + component->dpm_version.mca_component_name)); + + /* Call the component's init function and see if it wants to be + selected */ + + module = component->dpm_init(&priority); + + /* If we got a non-NULL module back, then the component wants + to be considered for selection */ + + if (NULL != module) { + /* If this is the best one, save it */ + if (priority > best_priority) { + + /* If there was a previous best one, finalize */ + if (NULL != best_module) { + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: found better component - finalizing component %s", + best_component->dpm_version.mca_component_name)); + + best_module->finalize(); + } + + /* Save the new best one */ + best_module = module; + best_component = component; + + /* update the best priority */ + best_priority = priority; + } else { + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: component %s does did not win the election", + component->dpm_version.mca_component_name)); + + if (NULL == module->finalize) { + opal_output(ompi_dpm_base_output, + "It appears you are the victim of a stale library - please delete your installation lib directory and reinstall"); + } else { + module->finalize(); + } + } + } + } + + /* If we didn't find one to select, barf */ + + if (NULL == best_component) { + return OMPI_ERROR; + } + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: component %s was selected", + best_component->dpm_version.mca_component_name)); + + /* We have happiness -- save the component and module for later + usage */ + + ompi_dpm = *best_module; + ompi_dpm_base_selected_component = *best_component; + + /* unload all components that were not selected */ + item = opal_list_get_first(&ompi_dpm_base_components_available); + while(item != opal_list_get_end(&ompi_dpm_base_components_available)) { + opal_list_item_t* next = opal_list_get_next(item); + ompi_dpm_base_component_t* component; + cli = (mca_base_component_list_item_t *) item; + component = (ompi_dpm_base_component_t *) cli->cli_component; + if(component != best_component) { + + OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output, + "ompi:dpm:base:select: module %s unloaded", + component->dpm_version.mca_component_name)); + + mca_base_component_repository_release((mca_base_component_t *) component); + opal_list_remove_item(&ompi_dpm_base_components_available, item); + OBJ_RELEASE(item); + } + item = next; + } + + /* init the selected module */ + if (NULL != ompi_dpm.init) { + if (OMPI_SUCCESS != (rc = ompi_dpm.init())) { + return rc; + } + } + return OMPI_SUCCESS; +} diff --git a/orte/mca/sds/lsf/configure.params b/ompi/mca/dpm/base/help-ompi-dpm-base.txt similarity index 72% rename from orte/mca/sds/lsf/configure.params rename to ompi/mca/dpm/base/help-ompi-dpm-base.txt index 8fc44480a6..3ce9616d36 100644 --- a/orte/mca/sds/lsf/configure.params +++ b/ompi/mca/dpm/base/help-ompi-dpm-base.txt @@ -1,22 +1,20 @@ -# -*- shell-script -*- + -*- text -*- # -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2007 Los Alamos National Security, LLC. All rights -# reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # - -PARAM_CONFIG_FILES="Makefile" +# This is the US/English general help file for OMPI DPM framework. +# diff --git a/ompi/mca/dpm/dpm.h b/ompi/mca/dpm/dpm.h new file mode 100644 index 0000000000..c0bd9e75b1 --- /dev/null +++ b/ompi/mca/dpm/dpm.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Dynamic Process Management Interface + * + */ + +#ifndef OMPI_MCA_DPM_H +#define OMPI_MCA_DPM_H + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/class/opal_object.h" + +#include "ompi/info/info.h" +#include "ompi/communicator/communicator.h" + +BEGIN_C_DECLS + +/* OMPI port definitions */ +#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX + +#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1 +#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2 +#define OMPI_RML_TAG_XOPENIB OMPI_RML_TAG_BASE+3 +#define OMPI_RML_TAG_COMM_CID_INTRA OMPI_RML_TAG_BASE+4 +#define OMPI_RML_TAG_XOOB OMPI_RML_TAG_BASE+5 +#define OMPI_RML_TAG_SM_BACK_FILE_CREATED OMPI_RML_TAG_BASE+6 +#define OMPI_RML_TAG_WIREUP OMPI_RML_TAG_BASE+7 +#define OMPI_CRCP_COORD_BOOKMARK_TAG OMPI_RML_TAG_BASE+8 +#define OMPI_COMM_JOIN_TAG OMPI_RML_TAG_BASE+9 + +#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200 + + +/* + * Initialize a module + */ +typedef int (*ompi_dpm_base_module_init_fn_t)(void); + +/* + * Connect/accept communications + */ +typedef int (*ompi_dpm_base_module_connect_accept_fn_t)(ompi_communicator_t *comm, int root, + orte_process_name_t *port, bool send_first, + ompi_communicator_t **newcomm, orte_rml_tag_t tag); + +/** + * Executes internally a disconnect on all dynamic communicators + * in case the user did not disconnect them. + */ +typedef void (*ompi_dpm_base_module_disconnect_fn_t)(ompi_communicator_t *comm); + +/* + * Dynamically spawn processes + */ +typedef int (*ompi_dpm_base_module_spawn_fn_t)(int count, char **array_of_commands, + char ***array_of_argv, + int *array_of_maxprocs, + MPI_Info *array_of_info, + char *port_name); + +/* + * This routine checks, whether an application has been spawned + * by another MPI application, or has been independently started. + * If it has been spawned, it establishes the parent communicator. + * Since the routine has to communicate, it should be among the last + * steps in MPI_Init, to be sure that everything is already set up. + */ +typedef int (*ompi_dpm_base_module_dyn_init_fn_t)(void); + +/* + * Interface for mpi_finalize to call to ensure dynamically spawned procs + * collectively finalize + */ +typedef int (*ompi_dpm_base_module_dyn_finalize_fn_t)(void); + +/* this routine counts the number of different jobids of the processes + given in a certain communicator. If there is more than one jobid, + we mark the communicator as 'dynamic'. This is especially relevant + for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have + to wait for all still connected processes. +*/ +typedef void (*ompi_dpm_base_module_mark_dyncomm_fn_t)(ompi_communicator_t *comm); + +/* + * Open a port to interface to a dynamically spawned job + */ +typedef int (*ompi_dpm_base_module_open_port_fn_t)(char *port_name); + +/* + * Parse a port name to get the contact info and tag + */ +typedef char* (*ompi_dpm_base_module_parse_port_fn_t)(char *port_name, orte_rml_tag_t *tag); + +/* + * Close a port + */ +typedef int (*ompi_dpm_base_module_close_port_fn_t)(char *port_name); + +/* + * Finalize a module + */ +typedef int (*ompi_dpm_base_module_finalize_fn_t)(void); + +/** +* Structure for DPM v1.0.0 modules + */ +struct ompi_dpm_base_module_1_0_0_t { + /** Initialization Function */ + ompi_dpm_base_module_init_fn_t init; + /* connect/accept */ + ompi_dpm_base_module_connect_accept_fn_t connect_accept; + /* disconnect */ + ompi_dpm_base_module_disconnect_fn_t disconnect; + /* spawn processes */ + ompi_dpm_base_module_spawn_fn_t spawn; + /* dyn_init */ + ompi_dpm_base_module_dyn_init_fn_t dyn_init; + /* dyn_finalize */ + ompi_dpm_base_module_dyn_finalize_fn_t dyn_finalize; + /* mark dyncomm */ + ompi_dpm_base_module_mark_dyncomm_fn_t mark_dyncomm; + /* open port */ + ompi_dpm_base_module_open_port_fn_t open_port; + /* parse port */ + ompi_dpm_base_module_parse_port_fn_t parse_port; + /* close port */ + ompi_dpm_base_module_close_port_fn_t close_port; + /* finalize */ + ompi_dpm_base_module_finalize_fn_t finalize; +}; +typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_1_0_0_t; +typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_t; + +OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm; + + +typedef struct ompi_dpm_base_module_1_0_0_t* +(*ompi_dpm_base_component_init_fn_t)(int *priority); + + +/** + * Structure for DPM v1.0.0 components. + */ +struct ompi_dpm_base_component_1_0_0_t { + /** MCA base component */ + mca_base_component_t dpm_version; + /** MCA base data */ + mca_base_component_data_1_0_0_t dpm_data; + /* component selection */ + ompi_dpm_base_component_init_fn_t dpm_init; +}; +typedef struct ompi_dpm_base_component_1_0_0_t ompi_dpm_base_component_1_0_0_t; +typedef struct ompi_dpm_base_component_1_0_0_t ompi_dpm_base_component_t; + +/** + * Macro for use in components that are of type CRCP v1.0.0 + */ +#define OMPI_DPM_BASE_VERSION_1_0_0 \ + /* DPM v1.0 is chained to MCA v1.0 */ \ + MCA_BASE_VERSION_1_0_0, \ + /* DPM v1.0 */ \ + "dpm", 1, 0, 0 + + +END_C_DECLS + +#endif /* OMPI_MCA_DPM_H */ diff --git a/ompi/mca/dpm/orte/Makefile.am b/ompi/mca/dpm/orte/Makefile.am new file mode 100644 index 0000000000..57f9d04d2f --- /dev/null +++ b/ompi/mca/dpm/orte/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +dist_pkgdata_DATA = help-ompi-dpm-orte.txt + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_dpm_orte_DSO +component_noinst = +component_install = mca_dpm_orte.la +else +component_noinst = libmca_dpm_orte.la +component_install = +endif + +local_sources = \ + dpm_orte.c \ + dpm_orte.h \ + dpm_orte_component.c + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_dpm_orte_la_SOURCES = $(local_sources) +mca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_dpm_orte_la_SOURCES = $(local_sources) +libmca_dpm_orte_la_LIBADD = $(dpm_orte_LIBS) +libmca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS) + diff --git a/orte/mca/errmgr/bproc/configure.params b/ompi/mca/dpm/orte/configure.params similarity index 100% rename from orte/mca/errmgr/bproc/configure.params rename to ompi/mca/dpm/orte/configure.params diff --git a/ompi/mca/dpm/orte/dpm_orte.c b/ompi/mca/dpm/orte/dpm_orte.c new file mode 100644 index 0000000000..d3f8440556 --- /dev/null +++ b/ompi/mca/dpm/orte/dpm_orte.c @@ -0,0 +1,920 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include +#include +#include + +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/util/opal_getcwd.h" + +#include "opal/dss/dss.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/grpcomm/grpcomm.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/routed.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_data_server.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/proc/proc.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/info/info.h" +#include "ompi/runtime/ompi_module_exchange.h" + +#include "ompi/mca/dpm/base/base.h" +#include "dpm_orte.h" + +/* Local static variables */ +static opal_mutex_t ompi_dpm_port_mutex; +static orte_rml_tag_t next_tag; + + +/* + * Init the module + */ +static int init(void) +{ + OBJ_CONSTRUCT(&ompi_dpm_port_mutex, opal_mutex_t); + next_tag = OMPI_RML_TAG_DYNAMIC; + + return OMPI_SUCCESS; +} + +static int get_rport (orte_process_name_t *port, + int send_first, struct ompi_proc_t *proc, + orte_rml_tag_t tag, orte_process_name_t *rport); + + +static int connect_accept ( ompi_communicator_t *comm, int root, + orte_process_name_t *port, bool send_first, + ompi_communicator_t **newcomm, orte_rml_tag_t tag ) +{ + int size, rsize, rank, rc; + orte_std_cntr_t num_vals; + orte_std_cntr_t rnamebuflen = 0; + int rnamebuflen_int = 0; + void *rnamebuf=NULL; + + ompi_communicator_t *newcomp=MPI_COMM_NULL; + ompi_proc_t **rprocs=NULL; + ompi_group_t *group=comm->c_local_group; + orte_process_name_t *rport=NULL, tmp_port_name; + opal_buffer_t *nbuf=NULL, *nrbuf=NULL; + ompi_proc_t **proc_list=NULL, **new_proc_list; + int i,j, new_proc_len; + ompi_group_t *new_group_pointer; + + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:connect_accept with port %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(port))); + + size = ompi_comm_size ( comm ); + rank = ompi_comm_rank ( comm ); + + /* tell the progress engine to tick the event library more + often, to make sure that the OOB messages get sent */ + opal_progress_event_users_increment(); + + if ( rank == root ) { + /* The process receiving first does not have yet the contact + information of the remote process. Therefore, we have to + exchange that. + */ + + if(!OMPI_GROUP_IS_DENSE(group)) { + proc_list = (ompi_proc_t **) calloc (group->grp_proc_count, + sizeof (ompi_proc_t *)); + for(i=0 ; igrp_proc_count ; i++) + proc_list[i] = ompi_group_peer_lookup(group,i); + } + + if ( OMPI_COMM_JOIN_TAG != tag ) { + if(OMPI_GROUP_IS_DENSE(group)){ + rc = get_rport(port,send_first, + group->grp_proc_pointers[rank], tag, + &tmp_port_name); + } + else { + rc = get_rport(port,send_first, + proc_list[rank], tag, + &tmp_port_name); + } + if (OMPI_SUCCESS != rc) { + return rc; + } + rport = &tmp_port_name; + } else { + rport = port; + } + + /* Generate the message buffer containing the number of processes and the list of + participating processes */ + nbuf = OBJ_NEW(opal_buffer_t); + if (NULL == nbuf) { + return OMPI_ERROR; + } + + if (ORTE_SUCCESS != (rc = opal_dss.pack(nbuf, &size, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + + if(OMPI_GROUP_IS_DENSE(group)) { + ompi_proc_pack(group->grp_proc_pointers, size, nbuf); + } + else { + ompi_proc_pack(proc_list, size, nbuf); + } + + nrbuf = OBJ_NEW(opal_buffer_t); + if (NULL == nrbuf ) { + rc = OMPI_ERROR; + goto exit; + } + + /* Exchange the number and the list of processes in the groups */ + if ( send_first ) { + rc = orte_rml.send_buffer(rport, nbuf, tag, 0); + rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); + } else { + rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); + rc = orte_rml.send_buffer(rport, nbuf, tag, 0); + } + + if (ORTE_SUCCESS != (rc = opal_dss.unload(nrbuf, &rnamebuf, &rnamebuflen))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + } + + /* First convert the size_t to an int so we can cast in the bcast to a void * + * if we don't then we will get badness when using big vs little endian + * THIS IS NO LONGER REQUIRED AS THE LENGTH IS NOW A STD_CNTR_T, WHICH + * CORRELATES TO AN INT32 + */ + rnamebuflen_int = (int)rnamebuflen; + + /* bcast the buffer-length to all processes in the local comm */ + rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm, + comm->c_coll.coll_bcast_module); + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + rnamebuflen = rnamebuflen_int; + + if ( rank != root ) { + /* non root processes need to allocate the buffer manually */ + rnamebuf = (char *) malloc(rnamebuflen); + if ( NULL == rnamebuf ) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + + /* bcast list of processes to all procs in local group + and reconstruct the data. Note that proc_get_proclist + adds processes, which were not known yet to our + process pool. + */ + rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm, + comm->c_coll.coll_bcast_module); + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + + nrbuf = OBJ_NEW(opal_buffer_t); + if (NULL == nrbuf) { + goto exit; + } + if ( ORTE_SUCCESS != ( rc = opal_dss.load(nrbuf, rnamebuf, rnamebuflen))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + + num_vals = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(nrbuf, &rsize, &num_vals, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + + rc = ompi_proc_unpack(nrbuf, rsize, &rprocs, &new_proc_len, &new_proc_list); + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + + /* If we added new procs, we need to do the modex and then call + PML add_procs */ + if (new_proc_len > 0) { + opal_list_t all_procs; + orte_namelist_t *name; + + OBJ_CONSTRUCT(&all_procs, opal_list_t); + + if (send_first) { + for (i = 0 ; i < group->grp_proc_count ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = ompi_group_peer_lookup(group, i)->proc_name; + opal_list_append(&all_procs, &name->item); + } + + for (i = 0 ; i < rsize ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = rprocs[i]->proc_name; + opal_list_append(&all_procs, &name->item); + } + } else { + for (i = 0 ; i < rsize ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = rprocs[i]->proc_name; + opal_list_append(&all_procs, &name->item); + } + + for (i = 0 ; i < group->grp_proc_count ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = ompi_group_peer_lookup(group, i)->proc_name; + opal_list_append(&all_procs, &name->item); + } + } + + if (OMPI_SUCCESS != (rc = orte_grpcomm.modex(&all_procs))) { + ORTE_ERROR_LOG(rc); + goto exit; + } + + /* + while (NULL != (item = opal_list_remove_first(&all_procs))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&all_procs); + */ + + MCA_PML_CALL(add_procs(new_proc_list, new_proc_len)); + } + + OBJ_RELEASE(nrbuf); + if ( rank == root ) { + OBJ_RELEASE(nbuf); + } + + new_group_pointer=ompi_group_allocate(rsize); + if( NULL == new_group_pointer ) { + return MPI_ERR_GROUP; + } + + /* put group elements in the list */ + for (j = 0; j < rsize; j++) { + new_group_pointer->grp_proc_pointers[j] = rprocs[j]; + } /* end proc loop */ + + /* increment proc reference counters */ + ompi_group_increment_proc_count(new_group_pointer); + + /* set up communicator structure */ + rc = ompi_comm_set ( &newcomp, /* new comm */ + comm, /* old comm */ + group->grp_proc_count, /* local_size */ + NULL, /* local_procs */ + rsize, /* remote_size */ + NULL , /* remote_procs */ + NULL, /* attrs */ + comm->error_handler, /* error handler */ + NULL, /* topo component */ + group, /* local group */ + new_group_pointer /* remote group */ + ); + if ( NULL == newcomp ) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + ompi_group_decrement_proc_count (new_group_pointer); + OBJ_RELEASE(new_group_pointer); + new_group_pointer = MPI_GROUP_NULL; + + /* allocate comm_cid */ + rc = ompi_comm_nextcid ( newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + rport, /* remote leader */ + OMPI_COMM_CID_INTRA_OOB, /* mode */ + send_first ); /* send or recv first */ + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + + /* activate comm and init coll-component */ + rc = ompi_comm_activate ( newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + rport, /* remote leader */ + OMPI_COMM_CID_INTRA_OOB, /* mode */ + send_first, /* send or recv first */ + 0); /* sync_flag */ + if ( OMPI_SUCCESS != rc ) { + goto exit; + } + + /* Question: do we have to re-start some low level stuff + to enable the usage of fast communication devices + between the two worlds ? + */ + + + exit: + /* done with OOB and such - slow our tick rate again */ + opal_progress(); + opal_progress_event_users_decrement(); + + if ( NULL != rprocs ) { + free ( rprocs ); + } + if ( NULL != proc_list ) { + free ( proc_list ); + } + if ( OMPI_SUCCESS != rc ) { + if ( MPI_COMM_NULL != newcomp && NULL != newcomp ) { + OBJ_RETAIN(newcomp); + newcomp = MPI_COMM_NULL; + } + } + + *newcomm = newcomp; + return rc; +} + +static void disconnect(ompi_communicator_t *comm) +{ + ompi_dpm_base_disconnect_obj *dobj; + + dobj = ompi_dpm_base_disconnect_init (comm); + ompi_dpm_base_disconnect_waitall(1, &dobj); + +} + + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* + * This routine is necessary, since in the connect/accept case, the processes + * executing the connect operation have the OOB contact information of the + * leader of the remote group, however, the processes executing the + * accept get their own port_name = OOB contact information passed in as + * an argument. This is however useless. + * + * Therefore, the two root processes exchange this information at this + * point. + * + */ +int get_rport(orte_process_name_t *port, int send_first, + ompi_proc_t *proc, orte_rml_tag_t tag, + orte_process_name_t *rport_name) +{ + int rc; + orte_std_cntr_t num_vals; + + if ( send_first ) { + opal_buffer_t *sbuf; + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:get_rport sending to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(port))); + + sbuf = OBJ_NEW(opal_buffer_t); + if (NULL == sbuf) { + return OMPI_ERROR; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(sbuf, &(proc->proc_name), 1, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(sbuf); + return rc; + } + + rc = orte_rml.send_buffer(port, sbuf, tag, 0); + OBJ_RELEASE(sbuf); + if ( 0 > rc ) { + ORTE_ERROR_LOG(rc); + return rc; + } + + *rport_name = *port; + } else { + opal_buffer_t *rbuf; + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:get_rport waiting to recv", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + rbuf = OBJ_NEW(opal_buffer_t); + if (NULL == rbuf) { + return ORTE_ERROR; + } + if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, rbuf, tag, 0))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(rbuf); + return rc; + } + + num_vals = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, rport_name, &num_vals, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(rbuf); + return rc; + } + OBJ_RELEASE(rbuf); + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:get_rport recv'd name %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(rport_name))); + } + + return OMPI_SUCCESS; +} + + +static int spawn(int count, char **array_of_commands, + char ***array_of_argv, + int *array_of_maxprocs, + MPI_Info *array_of_info, + char *port_name) +{ + int rc, i, j, counter; + int have_wdir=0; + bool have_prefix; + int valuelen=OMPI_PATH_MAX, flag=0; + char cwd[OMPI_PATH_MAX]; + char host[OMPI_PATH_MAX]; /*** should define OMPI_HOST_MAX ***/ + char prefix[OMPI_PATH_MAX]; + char *base_prefix=NULL; + + orte_job_t *jdata; + orte_std_cntr_t dummy; + orte_app_context_t *app; + + bool timing = false; + struct timeval ompistart, ompistop; + + /* parse the info object */ + /* check potentially for: + - "host": desired host where to spawn the processes + - "hostfile": hostfile containing hosts where procs are + to be spawned + - "add-host": add the specified hosts to the known list + of available resources and spawn these + procs on them + - "add-hostfile": add the hosts in the hostfile to the + known list of available resources and spawn + these procs on them + - "prefix": the path to the root of the directory tree where ompi + executables and libraries can be found on all nodes + used to spawn these procs + - "arch": desired architecture + - "wdir": directory, where executable can be found + - "path": list of directories where to look for the executable + - "file": filename, where additional information is provided. + - "soft": see page 92 of MPI-2. + */ + + /* make sure the progress engine properly trips the event library */ + opal_progress_event_users_increment(); + + /* setup the job object */ + jdata = OBJ_NEW(orte_job_t); + + /* Convert the list of commands to an array of orte_app_context_t + pointers */ + for (i = 0; i < count; ++i) { + app = OBJ_NEW(orte_app_context_t); + if (NULL == app) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + /* add the app to the job data */ + orte_pointer_array_add(&dummy, jdata->apps, app); + jdata->num_apps++; + + /* copy over the name of the executable */ + app->app = strdup(array_of_commands[i]); + if (NULL == app->app) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + /* record the number of procs to be generated */ + app->num_procs = array_of_maxprocs[i]; + + /* copy over the argv array */ + counter = 1; + + if (MPI_ARGVS_NULL != array_of_argv && + MPI_ARGV_NULL != array_of_argv[i]) { + /* first need to find out how many entries there are */ + j=0; + while (NULL != array_of_argv[i][j]) { + j++; + } + counter += j; + } + + /* now copy them over, ensuring to NULL terminate the array */ + app->argv = (char**)malloc((1 + counter) * sizeof(char*)); + if (NULL == app->argv) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + app->argv[0] = strdup(array_of_commands[i]); + for (j=1; j < counter; j++) { + app->argv[j] = strdup(array_of_argv[i][j-1]); + } + app->argv[counter] = NULL; + + + /* the environment gets set by the launcher + * all we need to do is add the specific values + * needed for comm_spawn + */ + /* Add environment variable with the contact information for the + child processes. + */ + counter = 1; + app->env = (char**)malloc((1+counter) * sizeof(char*)); + if (NULL == app->env) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + asprintf(&(app->env[0]), "OMPI_PARENT_PORT=%s", port_name); + app->env[1] = NULL; + for (j = 0; NULL != environ[j]; ++j) { + if (0 == strncmp("OMPI_", environ[j], 5)) { + opal_argv_append_nosize(&app->env, environ[j]); + } + } + + /* Check for well-known info keys */ + have_wdir = 0; + have_prefix = false; + if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { + + /* check for 'host' */ + ompi_info_get (array_of_info[i], "host", sizeof(host), host, &flag); + if ( flag ) { + app->num_map = 1; + app->map_data = (orte_app_context_map_t **) malloc(sizeof(orte_app_context_map_t *)); + app->map_data[0] = OBJ_NEW(orte_app_context_map_t); + app->map_data[0]->map_type = ORTE_APP_CONTEXT_MAP_HOSTNAME; + app->map_data[0]->map_data = strdup(host); + } + + /* check for 'hostfile' */ + ompi_info_get (array_of_info[i], "hostfile", sizeof(host), host, &flag); + if ( flag ) { + app->hostfile = strdup(host); + } + + /* check for 'add-host' */ + ompi_info_get (array_of_info[i], "add-host", sizeof(host), host, &flag); + if ( flag ) { + app->num_map = 1; + app->map_data = (orte_app_context_map_t **) malloc(sizeof(orte_app_context_map_t *)); + app->map_data[0] = OBJ_NEW(orte_app_context_map_t); + app->map_data[0]->map_type = ORTE_APP_CONTEXT_MAP_ADD_HOSTNAME; + app->map_data[0]->map_data = strdup(host); + } + + /* check for 'add-hostfile' */ + ompi_info_get (array_of_info[i], "add-hostfile", sizeof(host), host, &flag); + if ( flag ) { + app->add_hostfile = strdup(host); + } + + /* 'path', 'arch', 'file', 'soft' -- to be implemented */ + + /* check for 'ompi_prefix' (OMPI-specific -- to effect the same + * behavior as --prefix option to orterun) + */ + ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix), prefix, &flag); + if ( flag ) { + app->prefix_dir = strdup(prefix); + have_prefix = true; + } + + /* check for 'wdir' */ + ompi_info_get (array_of_info[i], "wdir", valuelen, cwd, &flag); + if ( flag ) { + app->cwd = strdup(cwd); + have_wdir = 1; + } + + /* check for 'ompi_local_slave' - OMPI-specific -- indicates that + * the specified app is to be launched by the local orted as a + * "slave" process, typically to support an attached co-processor + */ + ompi_info_get_bool(array_of_info[i], "ompi_local_slave", &jdata->local_spawn, &flag); + + } + + /* default value: If the user did not tell us where to look for the + executable, we assume the current working directory */ + if ( !have_wdir ) { + if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OMPI_PATH_MAX))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(jdata); + opal_progress_event_users_decrement(); + return rc; + } + app->cwd = strdup(cwd); + } + + /* if the user told us a new prefix, then we leave it alone. otherwise, if + * a prefix had been provided before, copy that one into the new app_context + * for use by the spawned children + */ + if ( !have_prefix && NULL != base_prefix) { + app->prefix_dir = strdup(base_prefix); + } + + /* leave the map info alone - the launcher will + * decide where to put things + */ + } /* for (i = 0 ; i < count ; ++i) */ + + /* cleanup */ + if (NULL != base_prefix) { + free(base_prefix); + } + + /* check for timing request - get stop time and report elapsed time if so */ + if (timing) { + if (0 != gettimeofday(&ompistop, NULL)) { + opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); + } else { + opal_output(0, "ompi_comm_start_procs: time from start to prepare to spawn %ld usec", + (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + + (ompistop.tv_usec - ompistart.tv_usec))); + if (0 != gettimeofday(&ompistart, NULL)) { + opal_output(0, "ompi_comm_start_procs: could not obtain new start time"); + ompistart.tv_sec = ompistop.tv_sec; + ompistart.tv_usec = ompistop.tv_usec; + } + } + } + + /* spawn procs */ + rc = orte_plm.spawn(jdata); + OBJ_RELEASE(jdata); + + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + opal_progress_event_users_decrement(); + return MPI_ERR_SPAWN; + } + + /* check for timing request - get stop time and report elapsed time if so */ + if (timing) { + if (0 != gettimeofday(&ompistop, NULL)) { + opal_output(0, "ompi_comm_start_procs: could not obtain stop time"); + } else { + opal_output(0, "ompi_comm_start_procs: time to spawn %ld usec", + (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + + (ompistop.tv_usec - ompistart.tv_usec))); + } + } + + /* clean up */ + opal_progress_event_users_decrement(); + + return OMPI_SUCCESS; +} + +static int open_port(char *port_name) +{ + char *rml_uri, *ptr, tag[12]; + int rc; + + /* + * The port_name is equal to the OOB-contact information + * and an RML tag. The reason for adding the tag is + * to make the port unique for multi-threaded scenarios. + */ + + if (NULL == (rml_uri = orte_rml.get_contact_info())) { + return OMPI_ERR_NOT_AVAILABLE; + } + + sprintf(tag, "%d", (int)next_tag); + + /* if the overall port name is too long, we try to truncate the rml uri */ + rc = 0; + while ((strlen(rml_uri)+strlen(tag)) > (MPI_MAX_PORT_NAME-2)) { + /* if we have already tried several times, punt! */ + if (4 < rc) { + free(rml_uri); + return OMPI_ERROR; + } + /* find the trailing uri and truncate there */ + ptr = strrchr(rml_uri, ';'); + *ptr = '\0'; + ++rc; + } + + OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); + sprintf (port_name, "%s:%s", rml_uri, tag); + next_tag++; + OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); + + free ( rml_uri ); + + return OMPI_SUCCESS; +} + +/* takes a port_name and separates it into the RML URI +* and the tag +*/ +static char *parse_port (char *port_name, orte_rml_tag_t *tag) +{ + char *tmp_string, *ptr; + + /* find the ':' demarking the RML tag we added to the end */ + if (NULL == (ptr = strrchr(port_name, ':'))) { + return NULL; + } + + /* terminate the port_name at that location */ + *ptr = '\0'; + ptr++; + + /* convert the RML tag */ + sscanf(ptr,"%d", (int*)tag); + + /* see if the length of the RML uri is too long - if so, + * truncate it + */ + if (strlen(port_name) > MPI_MAX_PORT_NAME) { + port_name[MPI_MAX_PORT_NAME] = '\0'; + } + + /* copy the RML uri so we can return a malloc'd value + * that can later be free'd + */ + tmp_string = strdup(port_name); + + return tmp_string; +} + +static int close_port(char *port_name) +{ + return OMPI_SUCCESS; +} + +static int dyn_init(void) +{ + char *oob_port=NULL; + char *port_name=NULL; + int root=0, rc; + bool send_first = true; + orte_rml_tag_t tag; + ompi_communicator_t *newcomm=NULL; + orte_process_name_t port_proc_name; + ompi_group_t *group = NULL; + ompi_errhandler_t *errhandler = NULL; + + ompi_communicator_t *oldcomm; + + /* if env-variable is set, we are a dynamically spawned + * child - parse port and call comm_connect_accept */ + if (NULL == (port_name = ompi_dpm_base_dyn_init())) { + /* nothing to do */ + return OMPI_SUCCESS; + } + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:dyn_init with port %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + port_name)); + + /* split the content of the environment variable into + its pieces, which are RML-uri:tag */ + oob_port = parse_port (port_name, &tag); + + /* set the contact info into the local hash table */ + if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(oob_port))) { + ORTE_ERROR_LOG(rc); + free(oob_port); + return(rc); + } + + /* process the RML uri to get the port's process name */ + if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(oob_port, &port_proc_name, NULL))) { + ORTE_ERROR_LOG(rc); + free(oob_port); + return rc; + } + free(oob_port); /* done with this */ + + /* update the route to this process - in this case, we always give it + * as direct since we were given the contact info. We trust the + * selected routed component to do the Right Thing for its own mode + * of operation + */ + if (ORTE_SUCCESS != (rc = orte_routed.update_route(&port_proc_name, &port_proc_name))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, + "%s dpm:orte:dyn_init calling connect_accept to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&port_proc_name))); + + rc = connect_accept (MPI_COMM_WORLD, root, &port_proc_name, + send_first, &newcomm, tag ); + if (OMPI_SUCCESS != rc) { + return rc; + } + + /* Set the parent communicator */ + ompi_mpi_comm_parent = newcomm; + + /* originally, we set comm_parent to comm_null (in comm_init), + * now we have to decrease the reference counters to the according + * objects + */ + + oldcomm = &ompi_mpi_comm_null; + OBJ_RELEASE(oldcomm); + group = &ompi_mpi_group_null; + OBJ_RELEASE(group); + errhandler = &ompi_mpi_errors_are_fatal; + OBJ_RELEASE(errhandler); + + /* Set name for debugging purposes */ + snprintf(newcomm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT"); + + return OMPI_SUCCESS; +} + + +/* + * finalize the module + */ +static int finalize(void) +{ + OBJ_DESTRUCT(&ompi_dpm_port_mutex); + return OMPI_SUCCESS; +} + +/* + * instantiate the module + */ +ompi_dpm_base_module_t ompi_dpm_orte_module = { + init, + connect_accept, + disconnect, + spawn, + dyn_init, + ompi_dpm_base_dyn_finalize, + ompi_dpm_base_mark_dyncomm, + open_port, + parse_port, + close_port, + finalize +}; + + diff --git a/ompi/mca/dpm/orte/dpm_orte.h b/ompi/mca/dpm/orte/dpm_orte.h new file mode 100644 index 0000000000..71c70a3555 --- /dev/null +++ b/ompi/mca/dpm/orte/dpm_orte.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_DPM_ORTE_H +#define OMPI_DPM_ORTE_H + +#include "ompi_config.h" + +#include "orte/types.h" + +#include "ompi/mca/dpm/dpm.h" + +BEGIN_C_DECLS + +/* access to module */ +extern ompi_dpm_base_module_t ompi_dpm_orte_module; + +OMPI_MODULE_DECLSPEC extern ompi_dpm_base_component_t mca_dpm_orte_component; + +END_C_DECLS + +#endif /* OMPI_DPM_ORTE_H */ diff --git a/ompi/mca/dpm/orte/dpm_orte_component.c b/ompi/mca/dpm/orte/dpm_orte_component.c new file mode 100644 index 0000000000..50ada4d1a9 --- /dev/null +++ b/ompi/mca/dpm/orte/dpm_orte_component.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "dpm_orte.h" + +static int dpm_orte_component_open(void); +static int dpm_orte_component_close(void); +static ompi_dpm_base_module_t* dpm_orte_component_init( int* priority ); + +ompi_dpm_base_component_t mca_dpm_orte_component = { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a dpm v1.0.0 component (which also implies + a specific MCA version) */ + + OMPI_DPM_BASE_VERSION_1_0_0, + + "orte", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + dpm_orte_component_open, /* component open */ + dpm_orte_component_close /* component close */ + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* This component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + dpm_orte_component_init, /* component init */ +}; + + +int dpm_orte_component_open(void) +{ + return OMPI_SUCCESS; +} + +int dpm_orte_component_close(void) +{ + return OMPI_SUCCESS; +} + +ompi_dpm_base_module_t* dpm_orte_component_init(int* priority) +{ + *priority = 50; + + return &ompi_dpm_orte_module; +} diff --git a/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt b/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt new file mode 100644 index 0000000000..68bd910377 --- /dev/null +++ b/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt @@ -0,0 +1,43 @@ +# -*- text -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open MPI. +# +[dpm-orte:no-server] +Process rank %ld attempted to %s a global ompi_server that +could not be contacted. This is typically caused by either not +specifying the contact info for the server, or by the server not +currently executing. If you did specify the contact info for a +server, please check to see that the server is running and start +it again (or have your sys admin start it) if it isn't. + +[dpm-orte:unknown-order] +Process rank %ld attempted to lookup a value but provided an +unrecognized order parameter. Order parameters are used to tell Open +MPI if it should first look for the requested value locally (i.e., from +the current job) or from a global ompi_server. Accepted order +parameters are "local" and "global", respectively. + +[dpm-orte:too-many-orders] +Process rank %ld attempted to lookup a value but provided too many +order parameters (%ld found). Order parameters are used to tell +Open MPI if it should first look for the requested value locally +(i.e., from the current job) or from a global ompi_server. Accepted +order parameters are "local" and "global", respectively, and each can +only be specified once. diff --git a/orte/mca/ns/replica/ns_replica.c b/ompi/mca/io/romio/romio/confdb/.hgfoo similarity index 100% rename from orte/mca/ns/replica/ns_replica.c rename to ompi/mca/io/romio/romio/confdb/.hgfoo diff --git a/ompi/mca/mpool/base/mpool_base_mem_cb.c b/ompi/mca/mpool/base/mpool_base_mem_cb.c index eae53a291b..69cb42bb4d 100644 --- a/ompi/mca/mpool/base/mpool_base_mem_cb.c +++ b/ompi/mca/mpool/base/mpool_base_mem_cb.c @@ -23,8 +23,8 @@ #include "opal/util/output.h" #include "mpool_base_mem_cb.h" #include "base.h" +#include "orte/types.h" #include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" opal_pointer_array_t mca_mpool_base_mem_cb_array; diff --git a/ompi/mca/mpool/base/mpool_base_tree.c b/ompi/mca/mpool/base/mpool_base_tree.c index 984ff76f49..26f3ee31ac 100644 --- a/ompi/mca/mpool/base/mpool_base_tree.c +++ b/ompi/mca/mpool/base/mpool_base_tree.c @@ -26,9 +26,10 @@ #include "opal/mca/mca.h" #include "opal/util/show_help.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/util/name_fns.h" #include "orte/util/proc_info.h" #include "orte/util/sys_info.h" +#include "orte/runtime/orte_globals.h" #include "ompi/runtime/params.h" #include "mpool_base_tree.h" @@ -172,13 +173,13 @@ void mca_mpool_base_tree_print(void) if (num_leaks <= ompi_debug_show_mpi_alloc_mem_leaks || ompi_debug_show_mpi_alloc_mem_leaks < 0) { opal_show_help("help-mpool-base.txt", "all mem leaks", - true, ORTE_NAME_PRINT(orte_process_info.my_name), + true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_system_info.nodename, orte_process_info.pid, leak_msg); } else { int i = num_leaks - ompi_debug_show_mpi_alloc_mem_leaks; opal_show_help("help-mpool-base.txt", "some mem leaks", - true, ORTE_NAME_PRINT(orte_process_info.my_name), + true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_system_info.nodename, orte_process_info.pid, leak_msg, i, (i > 1) ? "s were" : " was", diff --git a/ompi/mca/mpool/rdma/mpool_rdma_module.c b/ompi/mca/mpool/rdma/mpool_rdma_module.c index c2d2fe8547..208d8d00e4 100644 --- a/ompi/mca/mpool/rdma/mpool_rdma_module.c +++ b/ompi/mca/mpool/rdma/mpool_rdma_module.c @@ -25,6 +25,8 @@ #include "ompi_config.h" #include "opal/include/opal/align.h" #include "orte/util/proc_info.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" #include "opal/util/output.h" #include "ompi/mca/mpool/rdma/mpool_rdma.h" #include @@ -431,7 +433,7 @@ void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool) if(true == mca_mpool_rdma_component.print_stats) { opal_output(0, "%s rdma: stats " "(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss, mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound, mpool_rdma->stat_evicted); diff --git a/ompi/mca/mtl/mx/mtl_mx_endpoint.c b/ompi/mca/mtl/mx/mtl_mx_endpoint.c index a4aad8ed6d..dacf18217c 100644 --- a/ompi/mca/mtl/mx/mtl_mx_endpoint.c +++ b/ompi/mca/mtl/mx/mtl_mx_endpoint.c @@ -21,10 +21,6 @@ #include #include #include "ompi/types.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/errmgr/errmgr.h" #include "opal/util/output.h" #include "mtl_mx.h" #include "mtl_mx_types.h" diff --git a/ompi/mca/pml/base/pml_base_select.c b/ompi/mca/pml/base/pml_base_select.c index 9d994e1014..c8b9c7bf85 100644 --- a/ompi/mca/pml/base/pml_base_select.c +++ b/ompi/mca/pml/base/pml_base_select.c @@ -25,7 +25,11 @@ #include "opal/runtime/opal_progress.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" + #include "orte/mca/errmgr/errmgr.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" @@ -165,10 +169,10 @@ int mca_pml_base_select(bool enable_progress_threads, if( NULL == tmp_val) { continue; } - orte_errmgr.error_detected(1, "PML %s cannot be selected", tmp_val, NULL); + orte_errmgr.abort(1, "PML %s cannot be selected", tmp_val); } if(0 == i) { - orte_errmgr.error_detected(2, "No pml component available. This shouldn't happen.", NULL); + orte_errmgr.abort(2, "No pml component available. This shouldn't happen."); } } diff --git a/ompi/mca/pml/crcpw/pml_crcpw_module.c b/ompi/mca/pml/crcpw/pml_crcpw_module.c index 861ccdc6ee..a0a75f81ed 100644 --- a/ompi/mca/pml/crcpw/pml_crcpw_module.c +++ b/ompi/mca/pml/crcpw/pml_crcpw_module.c @@ -27,7 +27,6 @@ #include "ompi/mca/btl/base/base.h" #include "ompi/mca/pml/crcpw/pml_crcpw.h" #include "ompi/mca/bml/base/base.h" -#include "orte/mca/ns/ns.h" #include "orte/mca/errmgr/errmgr.h" #include "ompi/class/ompi_free_list.h" diff --git a/ompi/mca/pml/dr/pml_dr.c b/ompi/mca/pml/dr/pml_dr.c index e311447889..2b48891301 100644 --- a/ompi/mca/pml/dr/pml_dr.c +++ b/ompi/mca/pml/dr/pml_dr.c @@ -34,7 +34,8 @@ #include "pml_dr_sendreq.h" #include "pml_dr_recvreq.h" #include "ompi/mca/bml/base/base.h" -#include "orte/mca/ns/ns.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "ompi/mca/pml/base/base.h" @@ -241,9 +242,9 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs) /* this won't work for comm spawn and other dynamic processes, but will work for initial job start */ idx = opal_pointer_array_add(&mca_pml_dr.endpoints, (void*) endpoint); - if(orte_ns.compare_fields(ORTE_NS_CMP_ALL, - orte_process_info.my_name, - &(endpoint->proc_ompi->proc_name)) == ORTE_EQUAL) { + if(orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + ORTE_PROC_MY_NAME, + &(endpoint->proc_ompi->proc_name)) == OPAL_EQUAL) { mca_pml_dr.my_rank = idx; } endpoint->local = endpoint->dst = idx; diff --git a/ompi/mca/pml/dr/pml_dr_endpoint.c b/ompi/mca/pml/dr/pml_dr_endpoint.c index ae954468fc..651811cea2 100644 --- a/ompi/mca/pml/dr/pml_dr_endpoint.c +++ b/ompi/mca/pml/dr/pml_dr_endpoint.c @@ -19,7 +19,6 @@ #include "ompi_config.h" #include "pml_dr.h" #include "pml_dr_endpoint.h" -#include "orte/mca/ns/ns.h" diff --git a/ompi/mca/pml/dr/pml_dr_recvfrag.c b/ompi/mca/pml/dr/pml_dr_recvfrag.c index 9f79baa62c..b59a728dcf 100644 --- a/ompi/mca/pml/dr/pml_dr_recvfrag.c +++ b/ompi/mca/pml/dr/pml_dr_recvfrag.c @@ -208,7 +208,7 @@ void mca_pml_dr_recv_frag_callback( return; } else { OPAL_OUTPUT((0, "%s:%d: the world as we know it is bad\n", __FILE__, __LINE__)); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } } comm = (mca_pml_dr_comm_t*)ompi_comm->c_pml_comm; @@ -241,7 +241,7 @@ void mca_pml_dr_recv_frag_callback( ompi_comm = ompi_comm_lookup(hdr->hdr_common.hdr_ctx); if(NULL == ompi_comm) { OPAL_OUTPUT((0, "%s:%d: the world as we know it is bad\n", __FILE__, __LINE__)); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } comm = (mca_pml_dr_comm_t*)ompi_comm->c_pml_comm; assert(hdr->hdr_common.hdr_src < opal_pointer_array_get_size(&comm->sparse_procs)); @@ -290,7 +290,7 @@ void mca_pml_dr_recv_frag_callback( ompi_comm = ompi_comm_lookup(hdr->hdr_common.hdr_ctx); if(NULL == ompi_comm) { MCA_PML_DR_DEBUG(0,(0, "%s:%d: the world as we know it is bad\n", __FILE__, __LINE__)); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } comm = (mca_pml_dr_comm_t*)ompi_comm->c_pml_comm; assert(hdr->hdr_common.hdr_src < opal_pointer_array_get_size(&comm->sparse_procs)); diff --git a/ompi/mca/pml/dr/pml_dr_sendreq.c b/ompi/mca/pml/dr/pml_dr_sendreq.c index 1167fcd591..8ffe014b08 100644 --- a/ompi/mca/pml/dr/pml_dr_sendreq.c +++ b/ompi/mca/pml/dr/pml_dr_sendreq.c @@ -121,7 +121,7 @@ static void mca_pml_dr_error_completion( mca_bml.bml_del_btl(btl); break; default: - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); break; } diff --git a/ompi/mca/pml/dr/pml_dr_vfrag.c b/ompi/mca/pml/dr/pml_dr_vfrag.c index 812c43aba7..3620716bf0 100644 --- a/ompi/mca/pml/dr/pml_dr_vfrag.c +++ b/ompi/mca/pml/dr/pml_dr_vfrag.c @@ -168,7 +168,7 @@ void mca_pml_dr_vfrag_reset(mca_pml_dr_vfrag_t* vfrag) if(mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->bml_endpoint->btl_eager) == 0 || mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->bml_endpoint->btl_eager) == 0) { opal_output(0, "%s:%d:%s: no path to peer", __FILE__, __LINE__, __func__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } if(vfrag->vf_offset == 0) { vfrag->bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->bml_endpoint->btl_eager); diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index 9bb72f8af1..ca499920d9 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -41,9 +41,7 @@ #include "ompi/runtime/ompi_cr.h" #include "ompi/runtime/ompi_module_exchange.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/gpr/gpr.h" mca_pml_ob1_t mca_pml_ob1 = { { @@ -450,7 +448,7 @@ void mca_pml_ob1_process_pending_rdma(void) void mca_pml_ob1_error_handler( struct mca_btl_base_module_t* btl, int32_t flags) { - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } int mca_pml_ob1_ft_event( int state ) @@ -458,7 +456,6 @@ int mca_pml_ob1_ft_event( int state ) ompi_proc_t** procs = NULL; size_t num_procs; int ret, p; - orte_buffer_t mdx_buf, rbuf; if(OPAL_CRS_CHECKPOINT == state) { ; @@ -469,10 +466,6 @@ int mca_pml_ob1_ft_event( int state ) else if(OPAL_CRS_RESTART == state) { /* * Get a list of processes - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. */ procs = ompi_proc_all(&num_procs); if(NULL == procs) { @@ -484,43 +477,26 @@ int mca_pml_ob1_ft_event( int state ) */ opal_output_verbose(10, ompi_cr_output, "pml:ob1: ft_event(Restart): Restart Modex information"); - if (OMPI_SUCCESS != (ret = ompi_modex_finalize())) { + if (OMPI_SUCCESS != (ret = orte_grpcomm.purge_proc_attrs())) { opal_output(0, - "pml:ob1: ft_event(Restart): modex_finalize Failed %d", + "pml:ob1: ft_event(Restart): purge_modex Failed %d", ret); return ret; } /* - * Make sure the modex is NULL so it can be re-initalized + * Refresh the proc structure, and publish our proc info in the modex. + * NOTE: Do *not* call ompi_proc_finalize as there are many places in + * the code that point to indv. procs in this strucutre. For our + * needs here we only need to fix up the modex, bml and pml + * references. */ - for(p = 0; p < (int)num_procs; ++p) { - if( NULL != procs[p]->proc_modex ) { - OBJ_RELEASE(procs[p]->proc_modex); - procs[p]->proc_modex = NULL; - } - } - - /* - * Init the modex structures - */ - if (OMPI_SUCCESS != (ret = ompi_modex_init())) { + if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { opal_output(0, - "pml:ob1: ft_event(Restart): modex_init Failed %d", + "pml:ob1: ft_event(Restart): proc_refresh Failed %d", ret); return ret; } - - /* - * Load back up the hostname/arch information into the modex - */ - if (OMPI_SUCCESS != (ret = ompi_proc_publish_info())) { - opal_output(0, - "pml:ob1: ft_event(Restart): proc_init Failed %d", - ret); - return ret; - } - } else if(OPAL_CRS_TERM == state ) { ; @@ -547,39 +523,16 @@ int mca_pml_ob1_ft_event( int state ) } else if(OPAL_CRS_RESTART == state) { /* - * Exchange the modex information once again + * Exchange the modex information once again. + * BTLs will have republished their modex information. */ - OBJ_CONSTRUCT(&mdx_buf, orte_buffer_t); - if (OMPI_SUCCESS != (ret = ompi_modex_get_my_buffer(&mdx_buf))) { + if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(NULL))) { opal_output(0, - "pml:ob1: ft_event(Restart): Failed ompi_modex_get_my_buffer() = %d", + "pml:ob1: ft_event(Restart): Failed orte_grpcomm.modex() = %d", ret); return ret; } - /* - * Do the allgather exchange of information - */ - OBJ_CONSTRUCT(&rbuf, orte_buffer_t); - if (OMPI_SUCCESS != (ret = orte_grpcomm.allgather(&mdx_buf, &rbuf))) { - opal_output(0, - "pml:ob1: ft_event(Restart): Failed orte_grpcomm.allgather() = %d", - ret); - return ret; - } - OBJ_DESTRUCT(&mdx_buf); - - /* - * Process the modex data into the proc structures - */ - if (OMPI_SUCCESS != (ret = ompi_modex_process_data(&rbuf))) { - opal_output(0, - "pml:ob1: ft_event(Restart): Failed ompi_modex_process_data() = %d", - ret); - return ret; - } - OBJ_DESTRUCT(&rbuf); - /* * Fill in remote proc information */ @@ -592,7 +545,7 @@ int mca_pml_ob1_ft_event( int state ) /* * Startup the PML stack now that the modex is running again - * Add the new procs + * Add the new procs (BTLs redo modex recv's) */ if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) { opal_output(0, "pml:ob1: fr_event(Restart): Failed in add_procs (%d)", ret); diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.c b/ompi/mca/pml/ob1/pml_ob1_rdma.c index 4e64a8729e..3d109ca713 100644 --- a/ompi/mca/pml/ob1/pml_ob1_rdma.c +++ b/ompi/mca/pml/ob1/pml_ob1_rdma.c @@ -25,7 +25,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" #include "ompi/mca/bml/bml.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" #include "ompi/mca/mpool/mpool.h" #include "pml_ob1.h" #include "pml_ob1_rdma.h" diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 30da9ce5ee..cf98f0d084 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -323,7 +323,7 @@ static void mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ ORTE_ERROR_LOG(status); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } mca_pml_ob1_send_fin(recvreq->req_recv.req_base.req_proc, @@ -390,7 +390,7 @@ int mca_pml_ob1_recv_request_get_frag( mca_pml_ob1_rdma_frag_t* frag ) return OMPI_ERR_OUT_OF_RESOURCE; } else { ORTE_ERROR_LOG(rc); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } } @@ -420,7 +420,7 @@ static void mca_pml_ob1_recv_request_rget( if( OPAL_UNLIKELY(NULL == frag) ) { /* GLB - FIX */ ORTE_ERROR_LOG(rc); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* lookup bml datastructures */ @@ -442,7 +442,7 @@ static void mca_pml_ob1_recv_request_rget( frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } frag->rdma_hdr.hdr_rget = *hdr; frag->rdma_req = recvreq; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 52f521ba53..80652faeb4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -158,7 +158,7 @@ mca_pml_ob1_match_completion_free( struct mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* signal request completion */ @@ -191,7 +191,7 @@ mca_pml_ob1_rndv_completion( mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* count bytes of user data actually delivered. As the rndv completion only @@ -278,7 +278,7 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* count bytes of user data actually delivered */ @@ -997,7 +997,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl, if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { /* TSW - FIX */ ORTE_ERROR_LOG(status); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc, @@ -1079,7 +1079,7 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag ) } else { /* TSW - FIX */ ORTE_ERROR_LOG(rc); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } } return OMPI_SUCCESS; @@ -1110,7 +1110,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq, if( OPAL_UNLIKELY(NULL == frag) ) { /* TSW - FIX */ ORTE_ERROR_LOG(rc); - orte_errmgr.abort(); + orte_errmgr.abort(-1, NULL); } /* setup fragment */ diff --git a/ompi/mca/pml/v/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c b/ompi/mca/pml/v/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c index a8658a9302..82f6dfdc69 100644 --- a/ompi/mca/pml/v/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c +++ b/ompi/mca/pml/v/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c @@ -22,6 +22,7 @@ #include "ompi/datatype/datatype_memcpy.h" #include +#include "orte/util/proc_info.h" #define sb mca_vprotocol_pessimist.sender_based diff --git a/orte/mca/gpr/Makefile.am b/ompi/mca/pubsub/Makefile.am similarity index 72% rename from orte/mca/gpr/Makefile.am rename to ompi/mca/pubsub/Makefile.am index d4f917d16d..bd10632353 100644 --- a/orte/mca/gpr/Makefile.am +++ b/ompi/mca/pubsub/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University @@ -17,22 +17,22 @@ # # main library setup -noinst_LTLIBRARIES = libmca_gpr.la -libmca_gpr_la_SOURCES = +noinst_LTLIBRARIES = libmca_pubsub.la +libmca_pubsub_la_SOURCES = # header setup -nobase_orte_HEADERS = +nobase_ompi_HEADERS = # local files -headers = gpr.h gpr_types.h -libmca_gpr_la_SOURCES += $(headers) +headers = pubsub.h +libmca_pubsub_la_SOURCES += $(headers) # Conditionally install the header files if WANT_INSTALL_HEADERS -nobase_orte_HEADERS += $(headers) -ortedir = $(includedir)/openmpi/orte/mca/gpr +nobase_ompi_HEADERS += $(headers) +ompidir = $(includedir)/openmpi/ompi/mca/pubsub else -ortedir = $(includedir) +ompidir = $(includedir) endif include base/Makefile.am diff --git a/orte/mca/schema/base/Makefile.am b/ompi/mca/pubsub/base/Makefile.am similarity index 74% rename from orte/mca/schema/base/Makefile.am rename to ompi/mca/pubsub/base/Makefile.am index 973b2cf08d..b83f3d2c1e 100644 --- a/orte/mca/schema/base/Makefile.am +++ b/ompi/mca/pubsub/base/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University @@ -19,8 +19,8 @@ headers += \ base/base.h -libmca_schema_la_SOURCES += \ - base/schema_base_open.c \ - base/schema_base_close.c \ - base/schema_base_select.c \ - base/schema_base_fns.c +libmca_pubsub_la_SOURCES += \ + base/pubsub_base_open.c \ + base/pubsub_base_close.c \ + base/pubsub_base_select.c + diff --git a/ompi/mca/pubsub/base/base.h b/ompi/mca/pubsub/base/base.h new file mode 100644 index 0000000000..e4b00627bf --- /dev/null +++ b/ompi/mca/pubsub/base/base.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef OMPI_MCA_PUBSUB_BASE_H +#define OMPI_MCA_PUBSUB_BASE_H + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "ompi/mca/pubsub/pubsub.h" + +/* + * Global functions for MCA overall PUBSUB + */ + +BEGIN_C_DECLS + +/** + * Initialize the PUBSUB MCA framework + * + * @retval OMPI_SUCCESS Upon success + * @retval OMPI_ERROR Upon failures + * + * This function is invoked during ompi_init(); + */ +OMPI_DECLSPEC int ompi_pubsub_base_open(void); + +/** + * Select an available component. + * + * @retval OMPI_SUCCESS Upon Success + * @retval OMPI_NOT_FOUND If no component can be selected + * @retval OMPI_ERROR Upon other failure + * + */ +OMPI_DECLSPEC int ompi_pubsub_base_select(void); + +/** + * Finalize the PUBSUB MCA framework + * + * @retval OMPI_SUCCESS Upon success + * @retval OMPI_ERROR Upon failures + * + * This function is invoked during ompi_finalize(); + */ +OMPI_DECLSPEC int ompi_pubsub_base_close(void); + + +/* useful globals */ +OMPI_DECLSPEC extern int ompi_pubsub_base_output; +OMPI_DECLSPEC extern opal_list_t ompi_pubsub_base_components_available; +OMPI_DECLSPEC extern ompi_pubsub_base_component_t ompi_pubsub_base_selected_component; +OMPI_DECLSPEC extern ompi_pubsub_base_module_t ompi_pubsub; + +END_C_DECLS + +#endif /* OMPI_MCA_PUBSUB_BASE_H */ diff --git a/ompi/mca/pubsub/base/pubsub_base_close.c b/ompi/mca/pubsub/base/pubsub_base_close.c new file mode 100644 index 0000000000..182468f08e --- /dev/null +++ b/ompi/mca/pubsub/base/pubsub_base_close.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "ompi/mca/pubsub/pubsub.h" +#include "ompi/mca/pubsub/base/base.h" + +int ompi_pubsub_base_close(void) +{ + /* Close the selected component */ + if( NULL != ompi_pubsub.finalize ) { + ompi_pubsub.finalize(); + } + + /* Close all available modules that are open */ + mca_base_components_close(ompi_pubsub_base_output, + &ompi_pubsub_base_components_available, + NULL); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/pubsub/base/pubsub_base_open.c b/ompi/mca/pubsub/base/pubsub_base_open.c new file mode 100644 index 0000000000..ad61a8b394 --- /dev/null +++ b/ompi/mca/pubsub/base/pubsub_base_open.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "ompi/mca/pubsub/pubsub.h" +#include "ompi/mca/pubsub/base/base.h" + +#include "ompi/mca/pubsub/base/static-components.h" + +/* + * Globals + */ +OMPI_DECLSPEC int ompi_pubsub_base_output = -1; +OMPI_DECLSPEC ompi_pubsub_base_module_t ompi_pubsub; +opal_list_t ompi_pubsub_base_components_available; +ompi_pubsub_base_component_t ompi_pubsub_base_selected_component; + +/** + * Function for finding and opening either all MCA components, + * or the one that was specifically requested via a MCA parameter. + */ +int ompi_pubsub_base_open(void) +{ + /* Debugging/Verbose output */ + ompi_pubsub_base_output = opal_output_open(NULL); + + /* Open up all available components */ + if (OPAL_SUCCESS != + mca_base_components_open("pubsub", + ompi_pubsub_base_output, + mca_pubsub_base_static_components, + &ompi_pubsub_base_components_available, + true)) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/pubsub/base/pubsub_base_select.c b/ompi/mca/pubsub/base/pubsub_base_select.c new file mode 100644 index 0000000000..e13e92cb2d --- /dev/null +++ b/ompi/mca/pubsub/base/pubsub_base_select.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/base/mca_base_component_repository.h" + +#include "ompi/mca/pubsub/pubsub.h" +#include "ompi/mca/pubsub/base/base.h" + + +int ompi_pubsub_base_select(void) +{ + opal_list_item_t *item; + mca_base_component_list_item_t *cli; + ompi_pubsub_base_component_t *component, *best_component = NULL; + ompi_pubsub_base_module_t *module, *best_module = NULL; + int priority, best_priority = -1; + int rc; + + /* Query all the opened components and see if they want to run */ + + for (item = opal_list_get_first(&ompi_pubsub_base_components_available); + opal_list_get_end(&ompi_pubsub_base_components_available) != item; + item = opal_list_get_next(item)) { + cli = (mca_base_component_list_item_t *) item; + component = (ompi_pubsub_base_component_t *) cli->cli_component; + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: querying component %s", + component->pubsub_version.mca_component_name)); + + /* Call the component's init function and see if it wants to be + selected */ + + module = component->pubsub_init(&priority); + + /* If we got a non-NULL module back, then the component wants + to be considered for selection */ + + if (NULL != module) { + /* If this is the best one, save it */ + if (priority > best_priority) { + + /* If there was a previous best one, finalize */ + if (NULL != best_module) { + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: found better component - finalizing component %s", + best_component->pubsub_version.mca_component_name)); + + best_module->finalize(); + } + + /* Save the new best one */ + best_module = module; + best_component = component; + + /* update the best priority */ + best_priority = priority; + } else { + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: component %s does did not win the election", + component->pubsub_version.mca_component_name)); + + if (NULL == module->finalize) { + opal_output(ompi_pubsub_base_output, + "It appears you are the victim of a stale library - please delete your installation lib directory and reinstall"); + } else { + module->finalize(); + } + } + } + } + + /* If we didn't find one to select, barf */ + + if (NULL == best_component) { + return OMPI_ERROR; + } + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: component %s was selected", + best_component->pubsub_version.mca_component_name)); + + /* We have happiness -- save the component and module for later + usage */ + + ompi_pubsub = *best_module; + ompi_pubsub_base_selected_component = *best_component; + + /* unload all components that were not selected */ + item = opal_list_get_first(&ompi_pubsub_base_components_available); + while(item != opal_list_get_end(&ompi_pubsub_base_components_available)) { + opal_list_item_t* next = opal_list_get_next(item); + ompi_pubsub_base_component_t* component; + cli = (mca_base_component_list_item_t *) item; + component = (ompi_pubsub_base_component_t *) cli->cli_component; + if(component != best_component) { + + OPAL_OUTPUT_VERBOSE((10, ompi_pubsub_base_output, + "ompi:pubsub:base:select: module %s unloaded", + component->pubsub_version.mca_component_name)); + + mca_base_component_repository_release((mca_base_component_t *) component); + opal_list_remove_item(&ompi_pubsub_base_components_available, item); + OBJ_RELEASE(item); + } + item = next; + } + + /* init the selected module */ + if (NULL != ompi_pubsub.init) { + if (OMPI_SUCCESS != (rc = ompi_pubsub.init())) { + return rc; + } + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/pubsub/orte/Makefile.am b/ompi/mca/pubsub/orte/Makefile.am new file mode 100644 index 0000000000..4803e735bb --- /dev/null +++ b/ompi/mca/pubsub/orte/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +dist_pkgdata_DATA = help-ompi-pubsub-orte.txt + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_pubsub_orte_DSO +component_noinst = +component_install = mca_pubsub_orte.la +else +component_noinst = libmca_pubsub_orte.la +component_install = +endif + +local_sources = \ + pubsub_orte.c \ + pubsub_orte.h \ + pubsub_orte_component.c + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pubsub_orte_la_SOURCES = $(local_sources) +mca_pubsub_orte_la_LDFLAGS = -module -avoid-version $(pubsub_orte_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pubsub_orte_la_SOURCES = $(local_sources) +libmca_pubsub_orte_la_LIBADD = $(pubsub_orte_LIBS) +libmca_pubsub_orte_la_LDFLAGS = -module -avoid-version $(pubsub_orte_LDFLAGS) + diff --git a/orte/mca/errmgr/hnp/configure.params b/ompi/mca/pubsub/orte/configure.params similarity index 100% rename from orte/mca/errmgr/hnp/configure.params rename to ompi/mca/pubsub/orte/configure.params diff --git a/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt b/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt new file mode 100644 index 0000000000..b00908c65f --- /dev/null +++ b/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt @@ -0,0 +1,43 @@ +# -*- text -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open MPI. +# +[pubsub-orte:no-server] +Process rank %ld attempted to %s a global ompi_server that +could not be contacted. This is typically caused by either not +specifying the contact info for the server, or by the server not +currently executing. If you did specify the contact info for a +server, please check to see that the server is running and start +it again (or have your sys admin start it) if it isn't. + +[pubsub-orte:unknown-order] +Process rank %ld attempted to lookup a value but provided an +unrecognized order parameter. Order parameters are used to tell Open +MPI if it should first look for the requested value locally (i.e., from +the current job) or from a global ompi_server. Accepted order +parameters are "local" and "global", respectively. + +[pubsub-orte:too-many-orders] +Process rank %ld attempted to lookup a value but provided too many +order parameters (%ld found). Order parameters are used to tell +Open MPI if it should first look for the requested value locally +(i.e., from the current job) or from a global ompi_server. Accepted +order parameters are "local" and "global", respectively, and each can +only be specified once. diff --git a/ompi/mca/pubsub/orte/pubsub_orte.c b/ompi/mca/pubsub/orte/pubsub_orte.c new file mode 100644 index 0000000000..7ad1ec7341 --- /dev/null +++ b/ompi/mca/pubsub/orte/pubsub_orte.c @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include +#include +#include + +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/util/opal_getcwd.h" + +#include "opal/dss/dss.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/grpcomm/grpcomm.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/routed.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_data_server.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/proc/proc.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/info/info.h" +#include "ompi/runtime/ompi_module_exchange.h" + +#include "ompi/mca/pubsub/base/base.h" +#include "pubsub_orte.h" + +/* Establish contact with the server + * + * NOTE: we do not do this automatically during init to avoid + * forcing every process to pay the time penalty during MPI_Init + * when only a few, if any, will ever call pub/lookup/unpub. In + * addition, those that -do- call these functions may well only + * use local (as opposed to global) storage, and hence will have + * no need to talk to the server, even though a sys admin may + * have set one up. So we do a lazy setup of the server contact + * info - it only gets setup the first time we call a function + * that wants to talk to the global server + */ +static bool server_setup=false; + +static void setup_server(void) +{ + opal_buffer_t buf; + orte_rml_cmd_flag_t command=ORTE_RML_UPDATE_CMD; + int rc; + + if (NULL == mca_pubsub_orte_component.server_uri) { + /* if the contact info for the server is NULL, then there + * is nothing to do + */ + server_setup = true; + return; + } + + /* setup the route to the server using the + * selected routed component. This allows us + * to tell the local daemon how to reach the + * server, so we can still only have one connection + * open! To do this, we need to insert the server's + * uri into a buffer + */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + opal_dss.pack(&buf, &command, 1, ORTE_RML_CMD); + opal_dss.pack(&buf, &mca_pubsub_orte_component.server_uri, 1, OPAL_STRING); + if (ORTE_SUCCESS != (rc = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, &buf))) { + ORTE_ERROR_LOG(rc); + server_setup = true; + return; + } + OBJ_DESTRUCT(&buf); + + /* extract the server's name */ + orte_rml_base_parse_uris(mca_pubsub_orte_component.server_uri, &mca_pubsub_orte_component.server, NULL); + + /* flag the server as found */ + mca_pubsub_orte_component.server_found = true; + + /* flag setup as completed */ + server_setup = true; +} + +/* + * Init the module + */ +static int init(void) +{ + return OMPI_SUCCESS; +} + +/* + * publish the port_name for the specified service_name. This will + * be published under our process name, so only we will be allowed + * to remove it later. + */ +static int publish ( char *service_name, ompi_info_t *info, char *port_name ) +{ + int rc, ret, flag; + bool global_scope = false; + orte_process_name_t *info_host; + opal_buffer_t buf; + orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_PUBLISH; + orte_std_cntr_t cnt; + + ompi_info_get_bool(info, "ompi_global_scope", &global_scope, &flag); + + if (!global_scope) { + /* if the scope is not global, then store the value on the HNP */ + info_host = ORTE_PROC_MY_HNP; + } else { + /* has the server been setup yet? */ + if (!server_setup) { + setup_server(); + } + /* store the value on the global ompi_server, but error + * if that server wasn't contacted + */ + if (!mca_pubsub_orte_component.server_found) { + opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", + true, (long)ORTE_PROC_MY_NAME->vpid, "publish to"); + return OMPI_ERR_NOT_FOUND; + } + info_host = &mca_pubsub_orte_component.server; + } + + /* construct the buffer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + + /* pack the publish command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* pack the service name */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &service_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* pack the port name */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &port_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* send the data */ + if (0 > (rc = orte_rml.send_buffer(info_host, &buf, ORTE_RML_TAG_DATA_SERVER, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + OBJ_DESTRUCT(&buf); + + /* get the answer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + if (0 > (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, ORTE_RML_TAG_DATA_CLIENT, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* unpack the result */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &ret, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + rc = ret; + + +CLEANUP: + OBJ_DESTRUCT(&buf); + + return rc; +} + +enum { NONE, LOCAL, GLOBAL }; + +static char* lookup ( char *service_name, ompi_info_t *info ) +{ + orte_process_name_t *info_host; + opal_buffer_t buf; + orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_LOOKUP; + orte_std_cntr_t cnt=0; + char *port_name=NULL; + int ret, rc, flag, i; + char value[256], **tokens, *ptr; + int lookup[2] = { LOCAL, GLOBAL }; + size_t num_tokens; + + /* Look in the MPI_Info (ompi_info_t*) for the key + * "ompi_lookup_order". Acceptable values are: + * + * - "local" -- only check the local scope + * - "global" -- only check the global scope + * - "local,global" -- check the local scope first, then check the + * global scope + * - "global,local" -- check the global scope first, then check the + * local scope + * + * Give a little leeway in terms of whitespace in the value. + * + * The lookup[2] array will contain the results: lookup[0] is the + * first scope to check, lookup[1] is the 2nd. Either value may + * be NONE, LOCAL, or GLOBAL. If both are NONE, clearly that's an + * error. :-) + */ + ompi_info_get(info, "ompi_lookup_order", sizeof(value) - 1, value, &flag); + if (flag) { + ptr = &value[0]; + while (isspace(*ptr) && (ptr - value) < (int)sizeof(value)) { + ++ptr; + } + if (ptr - value < (int)sizeof(value)) { + tokens = opal_argv_split(ptr, ','); + if (NULL != tokens) { + if ((num_tokens = opal_argv_count(tokens)) > 2) { + /* too many values in the comma-delimited list */ + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:too-many-orders", + true, (long)ORTE_PROC_MY_NAME->vpid, + (long)num_tokens); + return NULL; + } + for (i = 0; i < 2; ++i) { + if (NULL != tokens[i]) { + if (0 == strcasecmp(tokens[i], "local")) { + lookup[i] = LOCAL; + } else if (0 == strcasecmp(tokens[i], "global")) { + lookup[i] = GLOBAL; + } else { + /* unrecognized value -- that's an error */ + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:unknown-order", + true, (long)ORTE_PROC_MY_NAME->vpid); + return NULL; + } + } else { + lookup[i] = NONE; + } + } + opal_argv_free(tokens); + } + } + } + + /* check for error situations */ + + if (NONE == lookup[0]) { + /* if the user provided an info key, then we at least must + * be given one place to look + */ + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:unknown-order", + true, (long)ORTE_PROC_MY_NAME->vpid); + return NULL; + } + + if (GLOBAL == lookup[0]) { + /* has the server been setup yet? */ + if (!server_setup) { + setup_server(); + } + + if (!mca_pubsub_orte_component.server_found) { + /* if we were told to look global first and no server is + * present, then that is an error + */ + opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", + true, (long)ORTE_PROC_MY_NAME->vpid, "lookup from"); + return NULL; + } + } + + /* go find the value */ + for (i=0; i < 2; i++) { + if (LOCAL == lookup[i]) { + /* if the scope is local, then lookup the value on the HNP */ + info_host = ORTE_PROC_MY_HNP; + } else if (GLOBAL == lookup[i]) { + /* has the server been setup yet? */ + if (!server_setup) { + setup_server(); + } + /* lookup the value on the global ompi_server, but error + * if that server wasn't contacted + */ + if (!mca_pubsub_orte_component.server_found) { + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:no-server", + true, (long)ORTE_PROC_MY_NAME->vpid, + "lookup from"); + return NULL; + } + info_host = &mca_pubsub_orte_component.server; + } else { + /* unknown host! */ + opal_show_help("help-ompi-pubsub-orte.txt", + "pubsub-orte:unknown-order", + true, (long)ORTE_PROC_MY_NAME->vpid); + return NULL; + } + + /* go look it up */ + /* construct the buffer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + + /* pack the lookup command */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + /* pack the service name */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buf, &service_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + /* send the cmd */ + if (0 > (ret = orte_rml.send_buffer(info_host, &buf, ORTE_RML_TAG_DATA_SERVER, 0))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + OBJ_DESTRUCT(&buf); + + /* get the answer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + if (0 > (ret = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, ORTE_RML_TAG_DATA_CLIENT, 0))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + /* unpack the return code */ + cnt = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(&buf, &rc, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + if (ORTE_SUCCESS == rc) { + /* the server was able to lookup the port - unpack the port name */ + cnt=1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(&buf, &port_name, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + if (NULL != port_name) { + /* got an answer - return it */ + OBJ_DESTRUCT(&buf); + return port_name; + } + } + + /* if we didn't get a port_name, then continue */ + OBJ_DESTRUCT(&buf); + } + + /* only get here if we tried both options and failed - since the + * buffer will already have been cleaned up, just return + */ + return NULL; + +CLEANUP: + OBJ_DESTRUCT(&buf); + + return NULL; + +} + +/* + * delete the entry. Only the process who has published + * the service_name has the right to remove this + * service - the server will verify and report the result + */ +static int unpublish ( char *service_name, ompi_info_t *info ) +{ + int rc, ret, flag; + bool global_scope; + orte_process_name_t *info_host; + opal_buffer_t buf; + orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_UNPUBLISH; + orte_std_cntr_t cnt; + + ompi_info_get_bool(info, "ompi_global_scope", &global_scope, &flag); + + if (!global_scope) { + /* if the scope is not global, then unpublish the value from the HNP */ + info_host = ORTE_PROC_MY_HNP; + } else { + /* has the server been setup yet? */ + if (!server_setup) { + setup_server(); + } + /* unpublish the value from the global ompi_server, but error + * if that server wasn't contacted + */ + if (!mca_pubsub_orte_component.server_found) { + opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", + true); + return OMPI_ERR_NOT_FOUND; + } + info_host = &mca_pubsub_orte_component.server; + } + + /* construct the buffer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + + /* pack the unpublish command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* pack the service name */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &service_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* send the command */ + if (0 > (rc = orte_rml.send_buffer(info_host, &buf, ORTE_RML_TAG_DATA_SERVER, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + OBJ_DESTRUCT(&buf); + + /* get the answer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + if (0 > (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, ORTE_RML_TAG_DATA_CLIENT, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* unpack the result */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &ret, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + rc = ret; + +CLEANUP: + OBJ_DESTRUCT(&buf); + + return rc; +} + + +/* + * finalize the module + */ +static int finalize(void) +{ + return OMPI_SUCCESS; +} + +/* + * instantiate the module + */ +ompi_pubsub_base_module_t ompi_pubsub_orte_module = { + init, + publish, + unpublish, + lookup, + finalize +}; + + diff --git a/ompi/mca/pubsub/orte/pubsub_orte.h b/ompi/mca/pubsub/orte/pubsub_orte.h new file mode 100644 index 0000000000..8f4acdff0d --- /dev/null +++ b/ompi/mca/pubsub/orte/pubsub_orte.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_PUBSUB_ORTE_H +#define OMPI_PUBSUB_ORTE_H + +#include "ompi_config.h" + +#include "orte/types.h" + +#include "ompi/mca/pubsub/pubsub.h" + +BEGIN_C_DECLS + +/* + * Extend the pubsub component to hold some useful + * values for this component + */ +typedef struct { + ompi_pubsub_base_component_t super; + orte_process_name_t server; + char *server_uri; + bool server_found; +} ompi_pubsub_orte_component_t; + +/* access to module */ +extern ompi_pubsub_base_module_t ompi_pubsub_orte_module; + +/* access to component so we can get to the locally + * global values + */ +OMPI_MODULE_DECLSPEC extern ompi_pubsub_orte_component_t mca_pubsub_orte_component; + +END_C_DECLS + +#endif /* OMPI_PUBSUB_ORTE_H */ diff --git a/ompi/mca/pubsub/orte/pubsub_orte_component.c b/ompi/mca/pubsub/orte/pubsub_orte_component.c new file mode 100644 index 0000000000..a33263a8bc --- /dev/null +++ b/ompi/mca/pubsub/orte/pubsub_orte_component.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "pubsub_orte.h" + +static int pubsub_orte_component_open(void); +static int pubsub_orte_component_close(void); +static ompi_pubsub_base_module_t* +pubsub_orte_component_init( int* priority ); + +ompi_pubsub_orte_component_t mca_pubsub_orte_component = { + { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a pubsub v1.0.0 component (which also implies + a specific MCA version) */ + + OMPI_PUBSUB_BASE_VERSION_1_0_0, + + "orte", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + pubsub_orte_component_open, /* component open */ + pubsub_orte_component_close /* component close */ + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* This component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + pubsub_orte_component_init, /* component init */ + } +}; + + +int pubsub_orte_component_open(void) +{ + return OMPI_SUCCESS; +} + +int pubsub_orte_component_close(void) +{ + if (NULL != mca_pubsub_orte_component.server_uri) { + free(mca_pubsub_orte_component.server_uri); + mca_pubsub_orte_component.server_uri = NULL; + } + return OMPI_SUCCESS; +} + +ompi_pubsub_base_module_t* pubsub_orte_component_init(int* priority) +{ + mca_base_component_t *comp = &mca_pubsub_orte_component.super.pubsub_version; + + mca_base_param_reg_string(comp, "server", + "Contact info for ompi_server for publish/subscribe operations", + false, false, NULL, + &mca_pubsub_orte_component.server_uri); + + mca_pubsub_orte_component.server_found = false; + + *priority = 50; + + return &ompi_pubsub_orte_module; +} diff --git a/ompi/mca/pubsub/pubsub.h b/ompi/mca/pubsub/pubsub.h new file mode 100644 index 0000000000..9e310bc757 --- /dev/null +++ b/ompi/mca/pubsub/pubsub.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Dynamic Process Management Interface + * + */ + +#ifndef OMPI_MCA_PUBSUB_H +#define OMPI_MCA_PUBSUB_H + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/class/opal_object.h" + +#include "ompi/info/info.h" +#include "ompi/communicator/communicator.h" + +BEGIN_C_DECLS + +/* + * Initialize a module + */ +typedef int (*ompi_pubsub_base_module_init_fn_t)(void); + +/* + * Publish a data item + */ +typedef int (*ompi_pubsub_base_module_publish_fn_t)(char *service, ompi_info_t *info, char *port); + +/* + * Unpublish a data item + */ +typedef int (*ompi_pubsub_base_module_unpublish_fn_t)(char *service, ompi_info_t *info); + +/* + * Lookup a data item + */ +typedef char* (*ompi_pubsub_base_module_lookup_fn_t)(char *service, ompi_info_t *info); + +/* + * Finalize a module + */ +typedef int (*ompi_pubsub_base_module_finalize_fn_t)(void); + +/** +* Structure for PUBSUB v1.0.0 modules + */ +struct ompi_pubsub_base_module_1_0_0_t { + /** Initialization Function */ + ompi_pubsub_base_module_init_fn_t init; + /* Publish */ + ompi_pubsub_base_module_publish_fn_t publish; + /* Unpublish */ + ompi_pubsub_base_module_unpublish_fn_t unpublish; + /* Lookup */ + ompi_pubsub_base_module_lookup_fn_t lookup; + /* finalize */ + ompi_pubsub_base_module_finalize_fn_t finalize; +}; +typedef struct ompi_pubsub_base_module_1_0_0_t ompi_pubsub_base_module_1_0_0_t; +typedef struct ompi_pubsub_base_module_1_0_0_t ompi_pubsub_base_module_t; + +OMPI_DECLSPEC extern ompi_pubsub_base_module_t ompi_pubsub; + + +typedef struct ompi_pubsub_base_module_1_0_0_t* +(*ompi_pubsub_base_component_init_fn_t)(int *priority); + + +/** + * Structure for PUBSUB v1.0.0 components. + */ +struct ompi_pubsub_base_component_1_0_0_t { + /** MCA base component */ + mca_base_component_t pubsub_version; + /** MCA base data */ + mca_base_component_data_1_0_0_t pubsub_data; + /* component selection */ + ompi_pubsub_base_component_init_fn_t pubsub_init; +}; +typedef struct ompi_pubsub_base_component_1_0_0_t ompi_pubsub_base_component_1_0_0_t; +typedef struct ompi_pubsub_base_component_1_0_0_t ompi_pubsub_base_component_t; + +/** + * Macro for use in components that are of type CRCP v1.0.0 + */ +#define OMPI_PUBSUB_BASE_VERSION_1_0_0 \ + /* PUBSUB v1.0 is chained to MCA v1.0 */ \ + MCA_BASE_VERSION_1_0_0, \ + /* PUBSUB v1.0 */ \ + "pubsub", 1, 0, 0 + + +END_C_DECLS + +#endif /* OMPI_MCA_PUBSUB_H */ diff --git a/ompi/mpi/c/close_port.c b/ompi/mpi/c/close_port.c index ba067bf750..e6fba135b6 100644 --- a/ompi/mpi/c/close_port.c +++ b/ompi/mpi/c/close_port.c @@ -19,6 +19,8 @@ #include #include "ompi/mpi/c/bindings.h" +#include "ompi/mca/dpm/dpm.h" + #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Close_port = PMPI_Close_port @@ -33,6 +35,7 @@ static const char FUNC_NAME[] = "MPI_Close_port"; int MPI_Close_port(char *port_name) { + int ret; OPAL_CR_NOOP_PROGRESS(); @@ -44,13 +47,7 @@ int MPI_Close_port(char *port_name) FUNC_NAME); } - /* - * since the port_name is our own process_name_t structure, - * we do not have to close anything or free a pointer. - * This function is therefore just a dummy function - * and fully implemented. I love these type functions, - * we should have more of them :-). - */ - - return MPI_SUCCESS; + ret = ompi_dpm.close_port(port_name); + + OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_accept.c b/ompi/mpi/c/comm_accept.c index 438336665b..48619cfaf3 100644 --- a/ompi/mpi/c/comm_accept.c +++ b/ompi/mpi/c/comm_accept.c @@ -21,6 +21,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -93,10 +94,10 @@ int MPI_Comm_accept(char *port_name, MPI_Info info, int root, * The two leaders will figure this out later. However, we need the tag. */ if ( rank == root ) { - tmp_port = ompi_parse_port(port_name, &tag); + tmp_port = ompi_dpm.parse_port(port_name, &tag); free (tmp_port); } - rc = ompi_comm_connect_accept (comm, root, NULL, send_first, &newcomp, tag); + rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, &newcomp, tag); *newcomm = newcomp; OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME ); diff --git a/ompi/mpi/c/comm_connect.c b/ompi/mpi/c/comm_connect.c index 5889a8affc..63f79b4439 100644 --- a/ompi/mpi/c/comm_connect.c +++ b/ompi/mpi/c/comm_connect.c @@ -21,9 +21,13 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" -#include "orte/mca/ns/ns.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" +#include "orte/util/name_fns.h" +#include "opal/dss/dss.h" +#include "orte/runtime/orte_globals.h" + #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Comm_connect = PMPI_Comm_connect #endif @@ -41,7 +45,7 @@ int MPI_Comm_connect(char *port_name, MPI_Info info, int root, int rank, rc; int send_first=1; /* yes, we are the active part in this game */ ompi_communicator_t *newcomp=MPI_COMM_NULL; - orte_process_name_t *port_proc_name=NULL; + orte_process_name_t port_proc_name; char *tmp_port=NULL; orte_rml_tag_t tag; @@ -97,18 +101,18 @@ int MPI_Comm_connect(char *port_name, MPI_Info info, int root, * structure. */ if ( rank == root ) { - tmp_port = ompi_parse_port (port_name, &tag); - if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(&port_proc_name, tmp_port))) { - return rc; + tmp_port = ompi_dpm.parse_port (port_name, &tag); + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_process_name(&port_proc_name, tmp_port))) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_PORT, FUNC_NAME); } - if ( NULL == port_proc_name ) { + if ( OPAL_EQUAL == opal_dss.compare(&port_proc_name, ORTE_NAME_INVALID, ORTE_NAME) ) { *newcomm = MPI_COMM_NULL; return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_PORT, FUNC_NAME); } free (tmp_port); } - rc = ompi_comm_connect_accept (comm, root, port_proc_name, send_first, + rc = ompi_dpm.connect_accept (comm, root, &port_proc_name, send_first, &newcomp, tag); *newcomm = newcomp; diff --git a/ompi/mpi/c/comm_disconnect.c b/ompi/mpi/c/comm_disconnect.c index 63969e1ec2..fa91669796 100644 --- a/ompi/mpi/c/comm_disconnect.c +++ b/ompi/mpi/c/comm_disconnect.c @@ -30,6 +30,9 @@ #include "ompi/mpi/c/profile/defines.h" #endif +#include "ompi/mca/dpm/dpm.h" + + static const char FUNC_NAME[] = "MPI_Comm_disconnect"; @@ -54,10 +57,7 @@ int MPI_Comm_disconnect(MPI_Comm *comm) OPAL_CR_ENTER_LIBRARY(); if ( OMPI_COMM_IS_DYNAMIC(*comm)) { - ompi_comm_disconnect_obj *dobj; - - dobj = ompi_comm_disconnect_init (*comm); - ompi_comm_disconnect_waitall(1, &dobj); + ompi_dpm.disconnect (*comm); } else { (*comm)->c_coll.coll_barrier(*comm, (*comm)->c_coll.coll_barrier_module); diff --git a/ompi/mpi/c/comm_join.c b/ompi/mpi/c/comm_join.c index 318d675f0c..047e295610 100644 --- a/ompi/mpi/c/comm_join.c +++ b/ompi/mpi/c/comm_join.c @@ -33,8 +33,10 @@ #endif #include "ompi/mpi/c/bindings.h" -#include "orte/mca/ns/ns.h" #include "ompi/proc/proc.h" +#include "ompi/mca/dpm/dpm.h" + +#include "orte/util/name_fns.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Comm_join = PMPI_Comm_join @@ -51,7 +53,8 @@ static int ompi_socket_recv (int fd, char *buf, int len ); int MPI_Comm_join(int fd, MPI_Comm *intercomm) { - int rc, tag=OMPI_COMM_JOIN_TAG; + int rc; + orte_rml_tag_t tag=OMPI_COMM_JOIN_TAG; size_t size; uint32_t len, rlen, llen, lrlen; int send_first=1; @@ -59,7 +62,7 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) ompi_proc_t **myproc=NULL; ompi_communicator_t *newcomp; - orte_process_name_t *port_proc_name=NULL; + orte_process_name_t port_proc_name; if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -76,7 +79,7 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) Need to determine somehow how to avoid a potential deadlock here. */ myproc = ompi_proc_self (&size); - if (ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string (&name, &(myproc[0]->proc_name)))) { + if (ORTE_SUCCESS != (rc = orte_util_convert_process_name_to_string (&name, &(myproc[0]->proc_name)))) { OPAL_CR_EXIT_LIBRARY(); return rc; } @@ -100,17 +103,16 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) ompi_socket_send (fd, name, llen); ompi_socket_recv (fd, rname, lrlen); - if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(&port_proc_name, rname))) { + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_process_name(&port_proc_name, rname))) { OPAL_CR_EXIT_LIBRARY(); - return rc; + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_PORT, FUNC_NAME); } - rc = ompi_comm_connect_accept (MPI_COMM_SELF, 0, port_proc_name, - send_first, &newcomp, tag); + rc = ompi_dpm.connect_accept (MPI_COMM_SELF, 0, &port_proc_name, + send_first, &newcomp, tag); free ( name ); free ( rname); - free ( port_proc_name ); free ( myproc ); *intercomm = newcomp; diff --git a/ompi/mpi/c/comm_spawn.c b/ompi/mpi/c/comm_spawn.c index 6e28409c94..50700dc2fc 100644 --- a/ompi/mpi/c/comm_spawn.c +++ b/ompi/mpi/c/comm_spawn.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,6 +22,7 @@ #include "opal/util/show_help.h" #include "ompi/info/info.h" #include "ompi/mpi/c/bindings.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -39,13 +40,14 @@ int MPI_Comm_spawn(char *command, char **argv, int maxprocs, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *intercomm, int *array_of_errcodes) { - int rank, rc, i; - int send_first=0; /* we wait to be contacted */ + int rank, rc=OMPI_SUCCESS, i, flag; + bool send_first = false; /* we wait to be contacted */ ompi_communicator_t *newcomp=NULL; char port_name[MPI_MAX_PORT_NAME]; char *tmp_port; orte_rml_tag_t tag; - + bool non_mpi = false; + MEMCHECKER( memchecker_comm(comm); ); @@ -89,21 +91,37 @@ int MPI_Comm_spawn(char *command, char **argv, int maxprocs, MPI_Info info, } } + /* See if the info key "ompi_non_mpi" was set to true */ + ompi_info_get_bool(info, "ompi_non_mpi", &non_mpi, &flag); + OPAL_CR_ENTER_LIBRARY(); if ( rank == root ) { - /* Open a port. The port_name is passed as an environment variable - to the children. */ - ompi_open_port (port_name); - if (OMPI_SUCCESS != (rc = ompi_comm_start_processes (1, &command, &argv, &maxprocs, - &info, port_name))) { + if (non_mpi) { + /* no port is required since we won't be + * communicating with the children + */ + port_name[0] = '\0'; + } else { + /* Open a port. The port_name is passed as an environment + variable to the children. */ + ompi_dpm.open_port (port_name); + } + if (OMPI_SUCCESS != (rc = ompi_dpm.spawn (1, &command, &argv, &maxprocs, + &info, port_name))) { goto error; } - tmp_port = ompi_parse_port (port_name, &tag); - free(tmp_port); + if (!non_mpi) { + tmp_port = ompi_dpm.parse_port (port_name, &tag); + free(tmp_port); + } + } + + if (non_mpi) { + newcomp = MPI_COMM_NULL; + } else { + rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, &newcomp, tag); } - - rc = ompi_comm_connect_accept (comm, root, NULL, send_first, &newcomp, tag); error: OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpi/c/comm_spawn_multiple.c b/ompi/mpi/c/comm_spawn_multiple.c index 7d1f8083d9..f8d618b7fe 100644 --- a/ompi/mpi/c/comm_spawn_multiple.c +++ b/ompi/mpi/c/comm_spawn_multiple.c @@ -22,6 +22,7 @@ #include "opal/util/show_help.h" #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" +#include "ompi/mca/dpm/dpm.h" #include "ompi/memchecker.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -40,12 +41,13 @@ int MPI_Comm_spawn_multiple(int count, char **array_of_commands, char ***array_o int root, MPI_Comm comm, MPI_Comm *intercomm, int *array_of_errcodes) { - int i=0, rc=0, rank=0; + int i=0, rc=0, rank=0, flag; ompi_communicator_t *newcomp=NULL; int send_first=0; /* they are contacting us first */ char port_name[MPI_MAX_PORT_NAME]; char *tmp_port; orte_rml_tag_t tag = 0; + bool non_mpi, cumulative = false; MEMCHECKER( memchecker_comm(comm); @@ -77,6 +79,24 @@ int MPI_Comm_spawn_multiple(int count, char **array_of_commands, char ***array_o return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO, FUNC_NAME); } + /* If ompi_non_mpi is set to true on any info, it must be + set to true on all of them. Note that not setting + ompi_non_mpi is the same as setting it to false. */ + ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, + &flag); + if (flag && 0 == i) { + /* If this is the first info, save its ompi_non_mpi value */ + cumulative = non_mpi; + } else if (!flag) { + non_mpi = false; + } + /* If this info's effective value doesn't agree with the + rest of them, error */ + if (cumulative != non_mpi) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, + MPI_ERR_INFO, + FUNC_NAME); + } } } @@ -106,22 +126,43 @@ int MPI_Comm_spawn_multiple(int count, char **array_of_commands, char ***array_o } } + if (MPI_INFO_NULL == array_of_info[0]) { + non_mpi = false; + } else { + ompi_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi, + &flag); + if (!flag) { + non_mpi = false; + } + } + OPAL_CR_ENTER_LIBRARY(); if ( rank == root ) { - /* Open a port. The port_name is passed as an environment variable - * to the children. */ - ompi_open_port (port_name); - if (OMPI_SUCCESS != (rc = ompi_comm_start_processes(count, array_of_commands, - array_of_argv, array_of_maxprocs, - array_of_info, port_name))) { + if (non_mpi) { + /* RHC: should this be better? */ + port_name[0] = '\0'; + } else { + /* Open a port. The port_name is passed as an environment + variable to the children. */ + ompi_dpm.open_port (port_name); + } + if (OMPI_SUCCESS != (rc = ompi_dpm.spawn(count, array_of_commands, + array_of_argv, array_of_maxprocs, + array_of_info, port_name))) { goto error; } - tmp_port = ompi_parse_port (port_name, &tag); - free(tmp_port); + if (!non_mpi) { + tmp_port = ompi_dpm.parse_port (port_name, &tag); + free(tmp_port); + } } - rc = ompi_comm_connect_accept (comm, root, NULL, send_first, &newcomp, tag); + if (non_mpi) { + newcomp = MPI_COMM_NULL; + } else { + rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, &newcomp, tag); + } error: OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpi/c/lookup_name.c b/ompi/mpi/c/lookup_name.c index ee966da911..59c9ec1a0c 100644 --- a/ompi/mpi/c/lookup_name.c +++ b/ompi/mpi/c/lookup_name.c @@ -21,6 +21,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" +#include "ompi/mca/pubsub/pubsub.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Lookup_name = PMPI_Lookup_name @@ -65,7 +66,7 @@ int MPI_Lookup_name(char *service_name, MPI_Info info, char *port_name) * if multiple entries found, this implementation uses * at the moment the first entry. */ - tmp = (char *) ompi_comm_namelookup(service_name); + tmp = (char *) ompi_pubsub.lookup(service_name, info); if ( NULL == tmp ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NAME, FUNC_NAME); diff --git a/ompi/mpi/c/open_port.c b/ompi/mpi/c/open_port.c index 84de7e479c..c673e25de3 100644 --- a/ompi/mpi/c/open_port.c +++ b/ompi/mpi/c/open_port.c @@ -21,6 +21,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" #include "ompi/proc/proc.h" +#include "ompi/mca/dpm/dpm.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Open_port = PMPI_Open_port @@ -61,8 +62,9 @@ int MPI_Open_port(MPI_Info info, char *port_name) */ } + rc = ompi_dpm.open_port(port_name); + OPAL_CR_ENTER_LIBRARY(); - rc = ompi_open_port(port_name); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/publish_name.c b/ompi/mpi/c/publish_name.c index e1748c1733..1f62ebe5c3 100644 --- a/ompi/mpi/c/publish_name.c +++ b/ompi/mpi/c/publish_name.c @@ -21,6 +21,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" +#include "ompi/mca/pubsub/pubsub.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Publish_name = PMPI_Publish_name @@ -62,9 +63,18 @@ int MPI_Publish_name(char *service_name, MPI_Info info, * therefore, we do not parse the info-object at the moment. */ - rc = ompi_comm_namepublish (service_name, port_name); + rc = ompi_pubsub.publish (service_name, info, port_name); OPAL_CR_EXIT_LIBRARY(); if ( OMPI_SUCCESS != rc ) { + if (OMPI_EXISTS == rc) { + /* already exists - can't publish it */ + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_FILE_EXISTS, + FUNC_NAME); + } + + /* none of the MPI-specific errors occurred - must be some + * kind of internal error + */ return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME); } diff --git a/ompi/mpi/c/unpublish_name.c b/ompi/mpi/c/unpublish_name.c index 8f6faabfd2..89f7195e7b 100644 --- a/ompi/mpi/c/unpublish_name.c +++ b/ompi/mpi/c/unpublish_name.c @@ -20,6 +20,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/info/info.h" +#include "ompi/mca/pubsub/pubsub.h" #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Unpublish_name = PMPI_Unpublish_name @@ -60,10 +61,26 @@ int MPI_Unpublish_name(char *service_name, MPI_Info info, * No predefined info-objects for this function in MPI-2, * therefore, we do not parse the info-object at the moment. */ - rc = ompi_comm_nameunpublish(service_name); + rc = ompi_pubsub.unpublish(service_name, info); if ( OMPI_SUCCESS != rc ) { + if (OMPI_ERR_NOT_FOUND == rc) { + /* service couldn't be found */ + OPAL_CR_EXIT_LIBRARY(); + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_SERVICE, + FUNC_NAME); + } + if (OMPI_ERR_PERM == rc) { + /* this process didn't own the specified service */ + OPAL_CR_EXIT_LIBRARY(); + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ACCESS, + FUNC_NAME); + } + + /* none of the MPI-specific errors occurred - must be some + * kind of internal error + */ OPAL_CR_EXIT_LIBRARY(); - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_SERVICE, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME); } diff --git a/ompi/mpi/man/man3/MPI_Comm_spawn.3 b/ompi/mpi/man/man3/MPI_Comm_spawn.3 index c7ad02d45f..c8398eeb31 100644 --- a/ompi/mpi/man/man3/MPI_Comm_spawn.3 +++ b/ompi/mpi/man/man3/MPI_Comm_spawn.3 @@ -95,15 +95,28 @@ The \fIinfo\fP argument is an opaque handle of type MPI_Info in C, MPI::Info in .sp For the SPAWN calls, \fIinfo\fP provides additional, implementation-dependent instructions to MPI and the runtime system on how to start processes. An application may pass MPI_INFO_NULL in C or Fortran. Portable programs not requiring detailed control over process locations should use MPI_INFO_NULL. .sp -The following values for \fIinfo\fP are recognized in Open MPI. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.) +The following keys for \fIinfo\fP are recognized in Open MPI. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.) .sp .nf -Key value Type Description ----------- ---- ----------- +Key Type Description +--- ---- ----------- host char * Host on which the process should be spawned. wdir char * Directory where the executable is located. +ompi_prefix char * Same as the --prefix command line argument + to mpirun. +ompi_non_mpi bool If set to true, launching a non-MPI + application; the returned communicator + will be MPI_COMM_NULL. .fi + +\fIbool\fP info keys are actually strings but are evaluated as +follows: if the string value is a number, it is converted to an +integer and cast to a boolean (meaning that zero integers are false +and non-zero values are true). If the string value is +(case-insensitive) "yes" or "true", the boolean is true. If the +string value is (case-insensitive) "no" or "false", the boolean is +false. All other string values are unrecognized, and therefore false. .sp The \fIroot\fP Argument diff --git a/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3 b/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3 index 4da5a9c5de..3e2b62427e 100644 --- a/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3 +++ b/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3 @@ -105,17 +105,37 @@ Error codes are treated as for MPI_Comm_spawn. .SH INFO ARGUMENTS -The following values for \fIinfo\fP are recognized in Open MPI 1.2. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.) +The following keys for \fIinfo\fP are recognized in Open MPI 1.2. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.) .sp .sp .nf -Key value Type Description ----------- ---- ----------- +Key Type Description +--- ---- ----------- host char * Host on which the process should be spawned. wdir char * Directory where the executable is located. +ompi_prefix char * Same as the --prefix command line argument + to mpirun. +ompi_non_mpi bool If set to true, launching a non-MPI + application; the returned communicator + will be MPI_COMM_NULL. .fi - + +.sp +\fIbool\fP info keys are actually strings but are evaluated as +follows: if the string value is a number, it is converted to an +integer and cast to a boolean (meaning that zero integers are false +and non-zero values are true). If the string value is +(case-insensitive) "yes" or "true", the boolean is true. If the +string value is (case-insensitive) "no" or "false", the boolean is +false. All other string values are unrecognized, and therefore false. + +.sp +Note that if any of the info handles have \fIompi_non_mpi\fP set to +true, then all info handles must have it set to true. If some are set +to true, but others are set to false (or are unset), MPI_ERR_INFO will +be returned. + .sp Note that in Open MPI 1.2, the first array location in \fIarray_of_info\fP is applied to all the commands in \fIarray_of_commands\fP. diff --git a/ompi/mpi/man/man3/MPI_Lookup_name.3 b/ompi/mpi/man/man3/MPI_Lookup_name.3 index 57eb070e0d..5e70ef1aea 100644 --- a/ompi/mpi/man/man3/MPI_Lookup_name.3 +++ b/ompi/mpi/man/man3/MPI_Lookup_name.3 @@ -35,7 +35,7 @@ service_name A service name (string). .TP 1.4i info -Options to the name service functions (handle). No options currently supported. +Options to the name service functions (handle). .SH OUTPUT PARAMETERS .ft R @@ -52,9 +52,37 @@ This function retrieves a \fIport_name\fP published under \fIservice_name\fP by a previous invocation of MPI_Publish_name. The application must supply a \fIport_name\fP buffer large enough to hold the largest possible port name (i.e., MPI_MAX_PORT_NAME bytes). + +.SH INFO ARGUMENTS +The following keys for \fIinfo\fP are recognized: .sp -The \fIinfo\fP parameter should be MPI_INFO_NULL, as this routine does -not parse any MPI Info arguments. +.sp +.nf +Key Type Description +--- ---- ----------- + +ompi_lookup_order char * Resolution order for name lookup. +.fi + +The \fIompi_lookup_order\fP info key can specify one of four valid +string values (see the NAME SCOPE section below for more information +on name scopes): + +.TP 4 +\fIlocal\fP: Only search the local scope for name resolution. +.TP 4 +\fIglobal\fP: Only search the global scope for name resolution. +.TP 4 +\fIlocal,global\fP: Search the local scope for name resolution. If +not found, try searching the global scope for name resolution. This +behavior is the default if the \fIompi_lookup_order\fP info key is not +specified. +.TP 4 +\fIglobal,local\fP: Search the global scope for name resolution. If +not found, try searching the local scope for name resolution. + +.SH NAME SCOPE +RHC fill in here :-) .SH ERRORS .ft R diff --git a/ompi/mpi/man/man3/MPI_Publish_name.3 b/ompi/mpi/man/man3/MPI_Publish_name.3 index 77741ee920..84f32f8ce2 100644 --- a/ompi/mpi/man/man3/MPI_Publish_name.3 +++ b/ompi/mpi/man/man3/MPI_Publish_name.3 @@ -35,7 +35,7 @@ service_name A service name (string). .TP 1.4i info -Options to the name service functions (handle). No options currently supported. +Options to the name service functions (handle). .ft R .TP 1.4i port_name @@ -53,9 +53,32 @@ an application may retrieve \fIport_name\fP by calling MPI_Lookup_name with \fIservice_name\fP as an argument. It is an error to publish the same \fIservice_name\fP twice, or to use a \fIport_name\fP argument that was not previously opened by the calling process via a call to MPI_Open_port. + +.SH INFO ARGUMENTS +The following keys for \fIinfo\fP are recognized: .sp -The \fIinfo\fP parameter should be MPI_INFO_NULL, as this routine does -not parse any MPI Info arguments. +.sp +.nf +Key Type Description +--- ---- ----------- + +ompi_global_scope bool If set to true, publish the name in + the global scope. Publish in the local + scope otherwise. See the NAME SCOPE + section for more details. +.fi + +.sp +\fIbool\fP info keys are actually strings but are evaluated as +follows: if the string value is a number, it is converted to an +integer and cast to a boolean (meaning that zero integers are false +and non-zero values are true). If the string value is +(case-insensitive) "yes" or "true", the boolean is true. If the +string value is (case-insensitive) "no" or "false", the boolean is +false. All other string values are unrecognized, and therefore false. + +.SH NAME SCOPE +RHC fill in here :-) .SH ERRORS .ft R diff --git a/ompi/mpi/man/man3/MPI_Unpublish_name.3 b/ompi/mpi/man/man3/MPI_Unpublish_name.3 index 6947655746..68072161fd 100644 --- a/ompi/mpi/man/man3/MPI_Unpublish_name.3 +++ b/ompi/mpi/man/man3/MPI_Unpublish_name.3 @@ -35,7 +35,7 @@ service_name A service name (string). .TP 1.4i info -Options to the name service functions (handle). No options currently supported. +Options to the name service functions (handle). .ft R .TP 1.4i port_name @@ -54,9 +54,33 @@ MPI_Lookup_name. It is an error to unpublish a \fIservice_name\fP that was not published via MPI_Publish_name. Both the \fIservice_name\fP and \fIport_name\fP arguments to MPI_Unpublish_name must be identical to the arguments to the previous call to MPI_Publish_name. + +.SH INFO ARGUMENTS +The following keys for \fIinfo\fP are recognized: .sp -The \fIinfo\fP parameter should be MPI_INFO_NULL, as this routine does -not parse any MPI Info arguments. +.sp +.nf +Key Type Description +--- ---- ----------- + +ompi_global_scope bool If set to true, unpublish the name from + the global scope. Unpublish from the local + scope otherwise. See the NAME SCOPE + section for more details. + +.fi + +.sp +\fIbool\fP info keys are actually strings but are evaluated as +follows: if the string value is a number, it is converted to an +integer and cast to a boolean (meaning that zero integers are false +and non-zero values are true). If the string value is +(case-insensitive) "yes" or "true", the boolean is true. If the +string value is (case-insensitive) "no" or "false", the boolean is +false. All other string values are unrecognized, and therefore false. + +.SH NAME SCOPE +RHC fill in here :-) .SH ERRORS .ft R diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index dcfe05f7e0..e2a9d67bbf 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -24,12 +24,14 @@ #include "opal/threads/mutex.h" #include "opal/util/output.h" #include "opal/util/show_help.h" + #include "orte/util/sys_info.h" -#include "orte/dss/dss.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" +#include "opal/dss/dss.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/proc_info.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "ompi/proc/proc.h" #include "ompi/mca/pml/pml.h" #include "ompi/datatype/dt_arch.h" @@ -57,7 +59,6 @@ void ompi_proc_construct(ompi_proc_t* proc) { proc->proc_bml = NULL; proc->proc_pml = NULL; - proc->proc_modex = NULL; OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t); /* By default all processors are supposelly having the same architecture as me. Thus, @@ -82,9 +83,6 @@ void ompi_proc_construct(ompi_proc_t* proc) void ompi_proc_destruct(ompi_proc_t* proc) { - if (proc->proc_modex != NULL) { - OBJ_RELEASE(proc->proc_modex); - } /* As all the convertors are created with OBJ_NEW we can just call OBJ_RELEASE. All, except * the local convertor, will get destroyed at some point here. If the reference count is correct * the local convertor (who has the reference count increased in the datatype) will not get @@ -103,28 +101,23 @@ void ompi_proc_destruct(ompi_proc_t* proc) int ompi_proc_init(void) { - orte_process_name_t *peers; - orte_std_cntr_t i, npeers; + orte_vpid_t i; int rc; uint32_t ui32; OBJ_CONSTRUCT(&ompi_proc_list, opal_list_t); OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t); - /* create a shell of a proc structure for every proc in MPI_COMM_WORLD */ - if(ORTE_SUCCESS != (rc = orte_ns.get_peers(&peers, &npeers, NULL))) { - opal_output(0, "ompi_proc_init: get_peers failed with errno=%d", rc); - return rc; - } - for( i = 0; i < npeers; i++ ) { + /* create proc structures and find self */ + for( i = 0; i < orte_process_info.num_procs; i++ ) { ompi_proc_t *proc = OBJ_NEW(ompi_proc_t); - proc->proc_name = peers[i]; + proc->proc_name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->proc_name.vpid = i; if( i == ORTE_PROC_MY_NAME->vpid ) { ompi_proc_local_proc = proc; proc->proc_flags |= OMPI_PROC_FLAG_LOCAL; } } - free(peers); /* Fill in our local information */ rc = ompi_arch_compute_local_id(&ui32); @@ -157,11 +150,11 @@ int ompi_proc_publish_info(void) { orte_std_cntr_t datalen; void *data; - orte_buffer_t* buf; + opal_buffer_t* buf; int rc; /* pack our local data for others to use */ - buf = OBJ_NEW(orte_buffer_t); + buf = OBJ_NEW(opal_buffer_t); rc = ompi_proc_pack(&ompi_proc_local_proc, 1, buf); if (OMPI_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -169,7 +162,7 @@ int ompi_proc_publish_info(void) } /* send our data into the ether */ - rc = orte_dss.unload(buf, &data, &datalen); + rc = opal_dss.unload(buf, &data, &datalen); if (OMPI_SUCCESS != rc) return rc; OBJ_RELEASE(buf); @@ -197,9 +190,9 @@ ompi_proc_get_info(void) char *hostname; void *data; size_t datalen; - orte_nodeid_t nodeid; + orte_vpid_t nodeid; - if (ORTE_EQUAL != orte_ns.compare_fields(ORTE_NS_CMP_JOBID, + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_JOBID, &ompi_proc_local_proc->proc_name, &proc->proc_name)) { /* not in our jobid -- this shouldn't happen */ @@ -209,12 +202,12 @@ ompi_proc_get_info(void) ret = ompi_modex_recv_string("ompi-proc-info", proc, &data, &datalen); if (OMPI_SUCCESS == ret) { - orte_buffer_t *buf; + opal_buffer_t *buf; orte_std_cntr_t count=1; orte_process_name_t name; - buf = OBJ_NEW(orte_buffer_t); - ret = orte_dss.load(buf, data, datalen); + buf = OBJ_NEW(opal_buffer_t); + ret = opal_dss.load(buf, data, datalen); if (OMPI_SUCCESS != ret) goto out; @@ -222,23 +215,23 @@ ompi_proc_get_info(void) could, in theory, use the unpack code on this proc. We don't,because we aren't adding procs, but need to update them */ - ret = orte_dss.unpack(buf, &name, &count, ORTE_NAME); + ret = opal_dss.unpack(buf, &name, &count, ORTE_NAME); if (ret != ORTE_SUCCESS) goto out; - ret = orte_dss.unpack(buf, &nodeid, &count, ORTE_NODEID); + ret = opal_dss.unpack(buf, &nodeid, &count, ORTE_VPID); if (ret != ORTE_SUCCESS) { ORTE_ERROR_LOG(ret); goto out; } - ret = orte_dss.unpack(buf, &arch, &count, ORTE_UINT32); + ret = opal_dss.unpack(buf, &arch, &count, OPAL_UINT32); if (ret != ORTE_SUCCESS) { ORTE_ERROR_LOG(ret); goto out; } - ret = orte_dss.unpack(buf, &hostname, &count, ORTE_STRING); + ret = opal_dss.unpack(buf, &hostname, &count, OPAL_STRING); if (ret != ORTE_SUCCESS) { ORTE_ERROR_LOG(ret); goto out; @@ -324,7 +317,7 @@ ompi_proc_t** ompi_proc_world(size_t *size) for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); proc = (ompi_proc_t*)opal_list_get_next(proc)) { - if (ORTE_EQUAL == orte_ns.compare_fields(mask, &proc->proc_name, &my_name)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, &my_name)) { ++count; } } @@ -340,7 +333,7 @@ ompi_proc_t** ompi_proc_world(size_t *size) for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); proc = (ompi_proc_t*)opal_list_get_next(proc)) { - if (ORTE_EQUAL == orte_ns.compare_fields(mask, &proc->proc_name, &my_name)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, &my_name)) { procs[count++] = proc; } } @@ -398,7 +391,7 @@ ompi_proc_t * ompi_proc_find ( const orte_process_name_t * name ) for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); proc = (ompi_proc_t*)opal_list_get_next(proc)) { - if (ORTE_EQUAL == orte_ns.compare_fields(mask, &proc->proc_name, name)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, name)) { rproc = proc; break; } @@ -421,7 +414,7 @@ ompi_proc_find_and_add(const orte_process_name_t * name, bool* isnew) for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); proc = (ompi_proc_t*)opal_list_get_next(proc)) { - if (ORTE_EQUAL == orte_ns.compare_fields(mask, &proc->proc_name, name)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, name)) { rproc = proc; *isnew = false; break; @@ -445,31 +438,31 @@ ompi_proc_find_and_add(const orte_process_name_t * name, bool* isnew) int -ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, orte_buffer_t* buf) +ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, opal_buffer_t* buf) { int i, rc; OPAL_THREAD_LOCK(&ompi_proc_lock); for (i=0; iproc_name), 1, ORTE_NAME); + rc = opal_dss.pack(buf, &(proclist[i]->proc_name), 1, ORTE_NAME); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); OPAL_THREAD_UNLOCK(&ompi_proc_lock); return rc; } - rc = orte_dss.pack(buf, &(proclist[i]->proc_nodeid), 1, ORTE_NODEID); + rc = opal_dss.pack(buf, &(proclist[i]->proc_nodeid), 1, ORTE_VPID); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); OPAL_THREAD_UNLOCK(&ompi_proc_lock); return rc; } - rc = orte_dss.pack(buf, &(proclist[i]->proc_arch), 1, ORTE_UINT32); + rc = opal_dss.pack(buf, &(proclist[i]->proc_arch), 1, OPAL_UINT32); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); OPAL_THREAD_UNLOCK(&ompi_proc_lock); return rc; } - rc = orte_dss.pack(buf, &(proclist[i]->proc_hostname), 1, ORTE_STRING); + rc = opal_dss.pack(buf, &(proclist[i]->proc_hostname), 1, OPAL_STRING); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); OPAL_THREAD_UNLOCK(&ompi_proc_lock); @@ -482,7 +475,7 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, orte_buffer_t* buf) int -ompi_proc_unpack(orte_buffer_t* buf, +ompi_proc_unpack(opal_buffer_t* buf, int proclistsize, ompi_proc_t ***proclist, int *newproclistsize, ompi_proc_t ***newproclist) { @@ -509,24 +502,24 @@ ompi_proc_unpack(orte_buffer_t* buf, char *new_hostname; bool isnew = false; int rc; - orte_nodeid_t new_nodeid; + orte_vpid_t new_nodeid; - rc = orte_dss.unpack(buf, &new_name, &count, ORTE_NAME); + rc = opal_dss.unpack(buf, &new_name, &count, ORTE_NAME); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_dss.unpack(buf, &new_nodeid, &count, ORTE_NODEID); + rc = opal_dss.unpack(buf, &new_nodeid, &count, ORTE_VPID); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_dss.unpack(buf, &new_arch, &count, ORTE_UINT32); + rc = opal_dss.unpack(buf, &new_arch, &count, OPAL_UINT32); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_dss.unpack(buf, &new_hostname, &count, ORTE_STRING); + rc = opal_dss.unpack(buf, &new_hostname, &count, OPAL_STRING); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; @@ -572,3 +565,59 @@ ompi_proc_unpack(orte_buffer_t* buf, *proclist = plist; return OMPI_SUCCESS; } + +int ompi_proc_refresh(void) { + ompi_proc_t *proc = NULL; + opal_list_item_t *item = NULL; + orte_vpid_t i = 0; + int rc; + uint32_t ui32; + + OPAL_THREAD_LOCK(&ompi_proc_lock); + + for( item = opal_list_get_first(&ompi_proc_list), i = 0; + item != opal_list_get_end(&ompi_proc_list); + item = opal_list_get_next(item), ++i ) { + proc = (ompi_proc_t*)item; + + /* Does not change: orte_process_info.num_procs */ + /* Does not change: proc->proc_name.vpid */ + proc->proc_name.jobid = ORTE_PROC_MY_NAME->jobid; + if( i == ORTE_PROC_MY_NAME->vpid ) { + ompi_proc_local_proc = proc; + proc->proc_flags |= OMPI_PROC_FLAG_LOCAL; + } else { + proc->proc_flags = 0; + } + } + + /* Fill in our local information */ + rc = ompi_arch_compute_local_id(&ui32); + if (OMPI_SUCCESS != rc) { + return rc; + } + + ompi_proc_local_proc->proc_nodeid = orte_system_info.nodeid; + ompi_proc_local_proc->proc_arch = ui32; + if (ompi_mpi_keep_peer_hostnames) { + if (ompi_mpi_keep_fqdn_hostnames) { + /* use the entire FQDN name */ + ompi_proc_local_proc->proc_hostname = strdup(orte_system_info.nodename); + } else { + /* use the unqualified name */ + char *tmp, *ptr; + tmp = strdup(orte_system_info.nodename); + if (NULL != (ptr = strchr(tmp, '.'))) { + *ptr = '\0'; + } + ompi_proc_local_proc->proc_hostname = strdup(tmp); + free(tmp); + } + } + + rc = ompi_proc_publish_info(); + + OPAL_THREAD_UNLOCK(&ompi_proc_lock); + + return rc; +} diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h index f148ef4376..8bf52a83ee 100644 --- a/ompi/proc/proc.h +++ b/ompi/proc/proc.h @@ -33,10 +33,10 @@ #include "ompi/types.h" #include "opal/class/opal_list.h" -#include "orte/dss/dss_types.h" #include "opal/threads/mutex.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" +#include "opal/dss/dss_types.h" BEGIN_C_DECLS @@ -54,14 +54,12 @@ struct ompi_proc_t { opal_list_item_t super; /** this process' name */ orte_process_name_t proc_name; - /** "nodeid" on which the proc resides */ - orte_nodeid_t proc_nodeid; + /** "nodeid" on which the proc resides - equiv to vpid of local daemon */ + orte_vpid_t proc_nodeid; /** PML specific proc data */ struct mca_pml_base_endpoint_t* proc_pml; /** BML specific proc data */ struct mca_bml_base_endpoint_t* proc_bml; - /** MCA module exchange data */ - opal_object_t* proc_modex; /** architecture of this process */ uint32_t proc_arch; /** Base convertor for the proc described by this process */ @@ -117,7 +115,7 @@ OMPI_DECLSPEC extern ompi_proc_t* ompi_proc_local_proc; * the conclusion of the stage gate. * * @retval OMPI_SUCESS System successfully initialized - * @retval OMPI_ERRROR Initialization failed due to unspecified error + * @retval OMPI_ERROR Initialization failed due to unspecified error */ int ompi_proc_init(void); @@ -135,7 +133,7 @@ int ompi_proc_init(void); * the conclusion of the stage gate. * * @retval OMPI_SUCESS Information available in the modex - * @retval OMPI_ERRROR Failure due to unspecified error + * @retval OMPI_ERROR Failure due to unspecified error */ int ompi_proc_publish_info(void); @@ -271,8 +269,8 @@ OMPI_DECLSPEC ompi_proc_t * ompi_proc_find ( const orte_process_name_t* name ); * @retval OMPI_SUCCESS Success * @retval OMPI_ERROR Unspecified error */ -int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, - orte_buffer_t *buf); +OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, + opal_buffer_t *buf); /** @@ -314,11 +312,26 @@ int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, * OMPI_SUCCESS on success * OMPI_ERROR else */ -int ompi_proc_unpack(orte_buffer_t *buf, - int proclistsize, ompi_proc_t ***proclist, - int *newproclistsize, ompi_proc_t ***newproclist); +OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf, + int proclistsize, ompi_proc_t ***proclist, + int *newproclistsize, ompi_proc_t ***newproclist); +/** + * Refresh the OMPI process subsystem + * + * Refrsh the Open MPI process subsystem. This function will update + * the list of proc instances in the current MPI_COMM_WORLD with + * data from the run-time environemnt. + * + * @note This is primarily used when restarting a process and thus + * need to update the jobid and node name. + * + * @retval OMPI_SUCESS System successfully refreshed + * @retval OMPI_ERROR Refresh failed due to unspecified error + */ +int ompi_proc_refresh(void); + END_C_DECLS #endif /* OMPI_PROC_PROC_H */ diff --git a/ompi/runtime/ompi_cr.c b/ompi/runtime/ompi_cr.c index 7d2ca5d9bd..dc52a93658 100644 --- a/ompi/runtime/ompi_cr.c +++ b/ompi/runtime/ompi_cr.c @@ -49,7 +49,7 @@ #include "orte/util/proc_info.h" #include "orte/mca/snapc/snapc.h" #include "orte/mca/snapc/base/base.h" -#include "orte/mca/smr/smr.h" +#include "orte/runtime/runtime.h" #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" @@ -336,12 +336,14 @@ static int ompi_cr_coord_post_restart(void) { opal_output_verbose(10, ompi_cr_output, "ompi_cr: coord_post_restart: ompi_cr_coord_post_restart()"); +#if 0 /* register myself to require that I finalize before exiting */ - if (ORTE_SUCCESS != (ret = orte_smr.register_sync())) { + if (ORTE_SUCCESS != (ret = orte_register_sync())) { exit_status = ret; goto cleanup; } - +#endif + /* * Notify PML * - Will notify BML and BTL's diff --git a/ompi/runtime/ompi_module_exchange.c b/ompi/runtime/ompi_module_exchange.c index 2b4acdfadb..b83e9271db 100644 --- a/ompi/runtime/ompi_module_exchange.c +++ b/ompi/runtime/ompi_module_exchange.c @@ -19,602 +19,30 @@ */ #include "ompi_config.h" +#include "ompi/constants.h" -#include "ompi/proc/proc.h" -#include "opal/threads/condition.h" -#include "opal/util/output.h" -#include "orte/util/proc_info.h" -#include "orte/class/orte_proc_table.h" - -#include "orte/dss/dss.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/schema/schema.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/gpr/base/base.h" -#include "orte/mca/ns/ns.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" + +#include "orte/mca/grpcomm/grpcomm.h" + +#include "ompi/proc/proc.h" #include "ompi/runtime/ompi_module_exchange.h" -/** - * @file - * - * MODEX DESIGN - * - * Modex data is always associated with a given orte process name, in - * an orte hash table. A backpointer is kept on an ompi_proc_t for - * fast access. The hash table is necessary because modex data is - * received from the GPR for entire jobids and when working with - * dynamic processes, it is possible we will receive data for a - * process not yet in the ompi_proc_all() list of process. This - * information must be kept for later use, because if accept/connect - * causes the proc to be added to the ompi_proc_all() list, the - * subscription to the mdoex information can not be reliably fired - * without causing a potential connection storm. Therefore, we use an - * orte_proc_table backing store to contain all modex information. - * Backpointers are provided from the ompi_proc_t structure to improve - * lookup performance in the common case. - * - * While we could add the now discovered proc into the ompi_proc_all() - * list, this has some problems, in that we don't have the - * architecture and hostname information needed to properly fill in - * the ompi_proc_t structure and we don't want to cause GPR - * communication to get it when we dont' really need to know anything - * about the remote proc. - * - * All data put into the modex (or received from the modex) is - * associated with a given proc,component pair. The data structures - * to maintain this data look something like: - * - * orte_hash_table_t ompi_modex_data -> list of ompi_modex_proc_t objects - * - * +-----------------------------+ - * | ompi_modex_proc_data_t | - * | - opal_list_item_t | - * +-----------------------------+ - * | opal_mutex_t modex_lock | - * | opal_condition_t modex_cond | - * | bool modex_received_data | 1 - * | opal_list_t modules | ---------+ - * +-----------------------------+ | - * * | - * +--------------------------------+ <--------+ - * | ompi_modex_module_data_t | - * | - opal_list_item_t | - * +--------------------------------+ - * | mca_base_component_t component | - * | void *module_data | - * | size_t module_data_size | 1 - * | opal_list_t module_cbs | ---------+ - * +--------------------------------+ | - * * | - * +---------------------------+ <--------+ - * | ompi_modex_cb_t | - * | - opal_list_item_t | - * +---------------------------+ - * | ompi_modex_cb_fn_t cbfunc | - * | void *cbdata | - * +---------------------------+ - * - * In order to maintain subscriptions to the registry for modex - * information, a list of all active subscriptions is maintained as a - * list (ompi_modex_subscriptions) of ompi_modex_subscription_t - * structures. The structure contains the jobid used in the - * subscription. - */ - - -/** - * Modex data for a particular orte process - * - * Locking infrastructure and list of module data for a given orte - * process name. The name association is maintained in the - * ompi_modex_proc_list hash table. - */ -struct ompi_modex_proc_data_t { - /** Structure can be put on lists (including in hash tables) */ - opal_list_item_t super; - /* Lock held whenever the modex data for this proc is being - modified */ - opal_mutex_t modex_lock; - /* Condition variable used when blocking on data from this - process. Should be signalled whenever data is updated for this - process. */ - opal_condition_t modex_cond; - /* True if modex data has ever been received from this process, - false otherwise. */ - bool modex_received_data; - /* List of ompi_modex_module_data_t structures containing all data - received from this process, sorted by component name. */ - opal_list_t modex_module_data; -}; -typedef struct ompi_modex_proc_data_t ompi_modex_proc_data_t; - -static void -ompi_modex_construct(ompi_modex_proc_data_t * modex) -{ - OBJ_CONSTRUCT(&modex->modex_lock, opal_mutex_t); - OBJ_CONSTRUCT(&modex->modex_cond, opal_condition_t); - modex->modex_received_data = false; - OBJ_CONSTRUCT(&modex->modex_module_data, opal_list_t); -} - -static void -ompi_modex_destruct(ompi_modex_proc_data_t * modex) -{ - OBJ_DESTRUCT(&modex->modex_module_data); - OBJ_DESTRUCT(&modex->modex_cond); - OBJ_DESTRUCT(&modex->modex_lock); -} - -OBJ_CLASS_INSTANCE(ompi_modex_proc_data_t, opal_object_t, - ompi_modex_construct, ompi_modex_destruct); - - - -/** - * Modex data for a particular component name - * - * Container for data for a particular proc,component pair. This - * structure should be contained in the modules list in an - * ompi_modex_proc_data_t structure to maintain an association with a - * given proc. The list is then searched for a matching component - * name. - * - * While searching the list or reading from (or writing to) this - * structure, the lock in the proc_data_t should be held. - */ -struct ompi_modex_module_data_t { - /** Structure can be put on lists */ - opal_list_item_t super; - /** Component information for this data */ - mca_base_component_t component; - /** Binary blob of data associated with this proc,component pair */ - void *module_data; - /** Size (in bytes) of module_data */ - size_t module_data_size; - /** callbacks that should be fired when module_data changes. */ - opal_list_t module_cbs; -}; -typedef struct ompi_modex_module_data_t ompi_modex_module_data_t; - -static void -ompi_modex_module_construct(ompi_modex_module_data_t * module) -{ - memset(&module->component, 0, sizeof(module->component)); - module->module_data = NULL; - module->module_data_size = 0; - OBJ_CONSTRUCT(&module->module_cbs, opal_list_t); -} - -static void -ompi_modex_module_destruct(ompi_modex_module_data_t * module) -{ - opal_list_item_t *item; - while (NULL != (item = opal_list_remove_first(&module->module_cbs))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&module->module_cbs); -} - -OBJ_CLASS_INSTANCE(ompi_modex_module_data_t, - opal_list_item_t, - ompi_modex_module_construct, - ompi_modex_module_destruct); - -/** - * Callback data for modex updates - * - * Data container for update callbacks that should be fired whenever a - * given proc,component pair has a modex data update. - */ -struct ompi_modex_cb_t { - opal_list_item_t super; - ompi_modex_cb_fn_t cbfunc; - void *cbdata; -}; -typedef struct ompi_modex_cb_t ompi_modex_cb_t; - -OBJ_CLASS_INSTANCE(ompi_modex_cb_t, - opal_list_item_t, - NULL, - NULL); - - - -/** - * Global modex list of proc data - * - * Global bhash table associating orte_process_name_t values with an - * ompi_modex_proc_data_t container. - * - * \note The ompi_modex_lock mutex should be held whenever this list - * is being updated or searched. - */ -static opal_hash_table_t ompi_modex_data; - -/** - * Global modex lock - * - * Global lock for modex usage, particularily protecting the - * ompi_modex_subscriptions list and the ompi_modex_data hash table. - */ -static opal_mutex_t ompi_modex_lock; - -static opal_mutex_t ompi_modex_string_lock; - -/* - * Global buffer we use to collect modex info for later - * transmission - */ -static orte_buffer_t ompi_modex_buffer; -static orte_std_cntr_t ompi_modex_num_entries; - - -int -ompi_modex_init(void) -{ - OBJ_CONSTRUCT(&ompi_modex_data, opal_hash_table_t); - OBJ_CONSTRUCT(&ompi_modex_lock, opal_mutex_t); - OBJ_CONSTRUCT(&ompi_modex_string_lock, opal_mutex_t); - - OBJ_CONSTRUCT(&ompi_modex_buffer, orte_buffer_t); - ompi_modex_num_entries = 0; - - opal_hash_table_init(&ompi_modex_data, 256); - - return OMPI_SUCCESS; -} - - -int -ompi_modex_finalize(void) -{ - opal_hash_table_remove_all(&ompi_modex_data); - OBJ_DESTRUCT(&ompi_modex_data); - - OBJ_DESTRUCT(&ompi_modex_string_lock); - OBJ_DESTRUCT(&ompi_modex_lock); - OBJ_DESTRUCT(&ompi_modex_buffer); - - return OMPI_SUCCESS; -} - - -/** - * Find data for a given component in a given modex_proc_data_t - * container. - * - * Find data for a given component in a given modex_proc_data_t - * container. The proc_data's modex_lock must be held during this - * search. - */ -static ompi_modex_module_data_t * -ompi_modex_lookup_module(ompi_modex_proc_data_t *proc_data, - mca_base_component_t *component, - bool create_if_not_found) -{ - ompi_modex_module_data_t *module_data = NULL; - for (module_data = (ompi_modex_module_data_t *) opal_list_get_first(&proc_data->modex_module_data); - module_data != (ompi_modex_module_data_t *) opal_list_get_end(&proc_data->modex_module_data); - module_data = (ompi_modex_module_data_t *) opal_list_get_next(module_data)) { - if (mca_base_component_compatible(&module_data->component, component) == 0) { - return module_data; - } - } - - if (create_if_not_found) { - module_data = OBJ_NEW(ompi_modex_module_data_t); - if (NULL == module_data) return NULL; - - memcpy(&module_data->component, component, sizeof(mca_base_component_t)); - opal_list_append(&proc_data->modex_module_data, &module_data->super); - - return module_data; - } - - return NULL; -} - - -/** - * Find ompi_modex_proc_data_t container associated with given - * orte_process_name_t. - * - * Find ompi_modex_proc_data_t container associated with given - * orte_process_name_t. The global lock should *NOT* be held when - * calling this function. - */ -static ompi_modex_proc_data_t* -ompi_modex_lookup_orte_proc(orte_process_name_t *orte_proc) -{ - ompi_modex_proc_data_t *proc_data; - - OPAL_THREAD_LOCK(&ompi_modex_lock); - proc_data = (ompi_modex_proc_data_t*) - orte_hash_table_get_proc(&ompi_modex_data, orte_proc); - if (NULL == proc_data) { - /* The proc clearly exists, so create a modex structure - for it */ - proc_data = OBJ_NEW(ompi_modex_proc_data_t); - if (NULL == proc_data) { - opal_output(0, "ompi_modex_lookup_orte_proc: unable to allocate ompi_modex_proc_data_t\n"); - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return NULL; - } - orte_hash_table_set_proc(&ompi_modex_data, orte_proc, proc_data); - } - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - - return proc_data; -} - - -/** - * Find ompi_modex_proc_data_t container associated with given ompi_proc_t - * - * Find ompi_modex_proc_data_t container associated with given - * ompi_proc_t. The global lock should *NOT* be held when calling - * this function. - */ -static ompi_modex_proc_data_t* -ompi_modex_lookup_proc(ompi_proc_t *proc) -{ - ompi_modex_proc_data_t *proc_data = - (ompi_modex_proc_data_t *) proc->proc_modex; - - if (NULL == proc_data) { - proc_data = ompi_modex_lookup_orte_proc(&proc->proc_name); - if (NULL == proc_data) return NULL; - - /* set the association with the ompi_proc, if not already done. */ - OPAL_THREAD_LOCK(&ompi_modex_lock); - if (NULL == proc->proc_modex) { - OBJ_RETAIN(proc_data); - proc->proc_modex = &proc_data->super.super; - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - } else { - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - } - } - - return proc_data; -} - - -/** - * Get the local buffer's data - */ -int -ompi_modex_get_my_buffer(orte_buffer_t *buf) -{ - int rc; - - OPAL_THREAD_LOCK(&ompi_modex_lock); - /* put our process name in the buffer so it can be unpacked later */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return rc; - } - - /* put the number of entries into the buffer */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &ompi_modex_num_entries, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return rc; - } - - /* if there are entries, copy the data across */ - if (0 < ompi_modex_num_entries) { - if (ORTE_SUCCESS != (orte_dss.copy_payload(buf, &ompi_modex_buffer))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return rc; - } - } - - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - return ORTE_SUCCESS; -} - -/** - * Process modex data - */ -int -ompi_modex_process_data(orte_buffer_t *buf) -{ - orte_std_cntr_t i, j, num_procs, num_entries; - opal_list_item_t *item; - void *bytes = NULL; - orte_std_cntr_t cnt; - orte_process_name_t proc_name; - ompi_modex_proc_data_t *proc_data; - ompi_modex_module_data_t *module_data; - mca_base_component_t component; - int rc; - - /* extract the number of entries in the buffer */ - cnt=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &num_procs, &cnt, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* process the buffer */ - for (i=0; i < num_procs; i++) { - /* unpack the process name */ - cnt=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &proc_name, &cnt, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* look up the modex data structure */ - proc_data = ompi_modex_lookup_orte_proc(&proc_name); - if (proc_data == NULL) { - /* report the error */ - opal_output(0, "ompi_modex_process_data: received modex info for unknown proc %s\n", - ORTE_NAME_PRINT(&proc_name)); - return OMPI_ERR_NOT_FOUND; - } - - /* unpack the number of entries for this proc */ - cnt=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &num_entries, &cnt, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - OPAL_THREAD_LOCK(&proc_data->modex_lock); - - /* - * Extract the component name and version - since there is one for each - * component type/name/version - process them all - */ - for (j = 0; j < num_entries; j++) { - size_t num_bytes; - char *ptr; - - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &ptr, &cnt, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - strcpy(component.mca_type_name, ptr); - free(ptr); - - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &ptr, &cnt, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - strcpy(component.mca_component_name, ptr); - free(ptr); - - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, - &component.mca_component_major_version, &cnt, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, - &component.mca_component_minor_version, &cnt, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, &num_bytes, &cnt, ORTE_SIZE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (num_bytes != 0) { - if (NULL == (bytes = malloc(num_bytes))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - cnt = (orte_std_cntr_t) num_bytes; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buf, bytes, &cnt, ORTE_BYTE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - num_bytes = cnt; - } else { - bytes = NULL; - } - - /* - * Lookup the corresponding modex structure - */ - if (NULL == (module_data = ompi_modex_lookup_module(proc_data, - &component, - true))) { - opal_output(0, "ompi_modex_process_data: ompi_modex_lookup_module failed\n"); - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - return OMPI_ERR_NOT_FOUND; - } - module_data->module_data = bytes; - module_data->module_data_size = num_bytes; - proc_data->modex_received_data = true; - opal_condition_signal(&proc_data->modex_cond); - - if (opal_list_get_size(&module_data->module_cbs)) { - ompi_proc_t *proc = ompi_proc_find(&proc_name); - - if (NULL != proc) { - OPAL_THREAD_LOCK(&proc->proc_lock); - /* call any registered callbacks */ - for (item = opal_list_get_first(&module_data->module_cbs); - item != opal_list_get_end(&module_data->module_cbs); - item = opal_list_get_next(item)) { - ompi_modex_cb_t *cb = (ompi_modex_cb_t *) item; - cb->cbfunc(&module_data->component, - proc, bytes, num_bytes, cb->cbdata); - } - OPAL_THREAD_UNLOCK(&proc->proc_lock); - } - } - } - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - } - - return OMPI_SUCCESS; -} - - int ompi_modex_send(mca_base_component_t * source_component, - const void *data, - size_t size) + const void *data, size_t size) { int rc; - char *ptr; - - OPAL_THREAD_LOCK(&ompi_modex_lock); + char * name = mca_base_component_to_string(source_component); - /* Pack the component name information into the local buffer */ - ptr = source_component->mca_type_name; - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &ptr, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - ptr = source_component->mca_component_name; - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &ptr, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &source_component->mca_component_major_version, 1, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &source_component->mca_component_minor_version, 1, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, &size, 1, ORTE_SIZE))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* Pack the actual data into the buffer */ - if (0 != size) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(&ompi_modex_buffer, (void *) data, size, ORTE_BYTE))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } + if(NULL == name) { + return OMPI_ERR_OUT_OF_RESOURCE; } - /* track the number of entries */ - ++ompi_modex_num_entries; - - cleanup: - OPAL_THREAD_UNLOCK(&ompi_modex_lock); - + rc = orte_grpcomm.set_proc_attr(name, data, size); + free(name); return rc; } @@ -625,108 +53,23 @@ ompi_modex_recv(mca_base_component_t * component, void **buffer, size_t * size) { - ompi_modex_proc_data_t *proc_data; - ompi_modex_module_data_t *module_data; - - /* make sure we could possibly have modex data */ - if (0 == strcmp(orte_gpr_base_selected_component.gpr_version.mca_component_name, - "null")) { - return OMPI_ERR_NOT_IMPLEMENTED; - } + int rc; + char * name = mca_base_component_to_string(component); - proc_data = ompi_modex_lookup_proc(proc); - if (NULL == proc_data) return OMPI_ERR_NOT_FOUND; - - OPAL_THREAD_LOCK(&proc_data->modex_lock); - - /* wait until data is available */ - while (proc_data->modex_received_data == false) { - opal_condition_wait(&proc_data->modex_cond, &proc_data->modex_lock); - } - - /* look up module */ - module_data = ompi_modex_lookup_module(proc_data, component, false); - - /* copy the data out to the user */ - if ((NULL == module_data) || - (module_data->module_data_size == 0)) { - opal_output(0, "modex recv: no module avail or zero byte size"); - *buffer = NULL; - *size = 0; - } else { - void *copy = malloc(module_data->module_data_size); - - if (copy == NULL) { - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - memcpy(copy, module_data->module_data, module_data->module_data_size); - *buffer = copy; - *size = module_data->module_data_size; - } - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - - return OMPI_SUCCESS; -} - - -int -ompi_modex_recv_nb(mca_base_component_t *component, - ompi_proc_t *proc, - ompi_modex_cb_fn_t cbfunc, - void *cbdata) -{ - ompi_modex_proc_data_t *proc_data; - ompi_modex_module_data_t *module_data; - ompi_modex_cb_t *cb; - - proc_data = ompi_modex_lookup_proc(proc); - if (NULL == proc_data) return OMPI_ERR_NOT_FOUND; - - OPAL_THREAD_LOCK(&proc_data->modex_lock); - - /* lookup / create module */ - module_data = ompi_modex_lookup_module(proc_data, component, true); - if (NULL == module_data) { - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + if(NULL == name) { return OMPI_ERR_OUT_OF_RESOURCE; } - - /* register the callback */ - cb = OBJ_NEW(ompi_modex_cb_t); - cb->cbfunc = cbfunc; - cb->cbdata = cbdata; - opal_list_append(&module_data->module_cbs, &cb->super); - OPAL_THREAD_UNLOCK(&proc_data->modex_lock); - - return OMPI_SUCCESS; + + rc = orte_grpcomm.get_proc_attr(proc->proc_name, name, buffer, size); + free(name); + return rc; } - -static mca_base_component_t modex_component = { - MCA_BASE_VERSION_1_0_0, - "modex", - MCA_BASE_VERSION_1_0_0, - "", - MCA_BASE_VERSION_1_0_0, - NULL, - NULL -}; - - int ompi_modex_send_string(const char* key, const void *buffer, size_t size) { - int ret; - - OPAL_THREAD_LOCK(&ompi_modex_string_lock); - strncpy(modex_component.mca_component_name, key, - MCA_BASE_MAX_COMPONENT_NAME_LEN); - ret = ompi_modex_send(&modex_component, buffer, size); - OPAL_THREAD_UNLOCK(&ompi_modex_string_lock); - - return ret; + return orte_grpcomm.set_proc_attr(key, buffer, size); } @@ -735,13 +78,5 @@ ompi_modex_recv_string(const char* key, struct ompi_proc_t *source_proc, void **buffer, size_t *size) { - int ret; - - OPAL_THREAD_LOCK(&ompi_modex_string_lock); - strncpy(modex_component.mca_component_name, key, - MCA_BASE_MAX_COMPONENT_NAME_LEN); - ret = ompi_modex_recv(&modex_component, source_proc, buffer, size); - OPAL_THREAD_UNLOCK(&ompi_modex_string_lock); - - return ret; + return orte_grpcomm.get_proc_attr(source_proc->proc_name, key, buffer, size); } diff --git a/ompi/runtime/ompi_module_exchange.h b/ompi/runtime/ompi_module_exchange.h index a69006e121..021b0ef5a3 100644 --- a/ompi/runtime/ompi_module_exchange.h +++ b/ompi/runtime/ompi_module_exchange.h @@ -51,8 +51,7 @@ #include #endif -#include "orte/dss/dss_types.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/types.h" struct mca_base_component_t; struct ompi_proc_t; @@ -171,50 +170,6 @@ OMPI_DECLSPEC int ompi_modex_recv(struct mca_base_component_t *dest_component, void **buffer, size_t *size); -/** - * Non-blocking modex receive callback - * - * Prototype for non-blocking modex receive callback. - * - * @param[in] component Pointer to copy of the component struct - * @param[in] proc Peer process infromation is from - * @param[in] buffer Newly updated buffer - * @param[in] size Size (in bytes) of buffer - * @param[in] cbdata Callback data provided when non-blocking - * receive is posted - */ -typedef void (*ompi_modex_cb_fn_t)(struct mca_base_component_t *component, - struct ompi_proc_t* proc, - void* buffer, - size_t size, - void* cbdata); - - -/** - * Register to receive a callback on change to module specific data. - * - * The non-blocking version of ompi_modex_recv(). All information - * about ompi_modex_recv() applies to ompi_modex_recv_nb(), with the - * exception of what happens when data is available for the given peer - * process but not the specified module. In that case, no callback - * will be fired until data is available. - * - * @param[in] component A pointer to this module's component struct - * @param[in] proc Peer process to receive from - * @param[in] cbfunc Callback function when data is available, - * of type ompi_modex_cb_fn_t - * @param[in] cbdata Opaque callback data to pass to cbfunc - * - * @retval OMPI_SUCCESS Success - * @retval OMPI_ERR_OUT_OF_RESOURCE No memory could be allocated - * for internal data structures - */ -OMPI_DECLSPEC int ompi_modex_recv_nb(struct mca_base_component_t *component, - struct ompi_proc_t* proc, - ompi_modex_cb_fn_t cbfunc, - void* cbdata); - - /** * Receive a buffer from a given peer * @@ -248,66 +203,6 @@ OMPI_DECLSPEC int ompi_modex_recv_string(const char* key, void **buffer, size_t *size); -/** - * Retrieve a copy of the modex buffer - * - * Each component will "send" its data on its own. The modex - * collects that data into a local static buffer. At some point, - * we need to provide a copy of the collected info so someone - * (usually mpi_init) can send it to everyone else. This function - * xfers the payload in the local static buffer into the provided - * buffer, thus resetting the local buffer for future use. - * - * @note This function is probably not useful outside of application - * initialization code. - * - * @param[in] *buf Pointer to the target buffer - * - * @retval OMPI_SUCCESS Successfully exchanged information - * @retval OMPI_ERROR An unspecified error occurred - */ -OMPI_DECLSPEC int ompi_modex_get_my_buffer(orte_buffer_t *buf); - -/** - * Process the data in a modex buffer - * - * Given a buffer containing a set of modex entries, this - * function will destructively read the buffer, adding the - * modex info to each proc. An error will be returned if - * modex info is found for a proc that is not yet in the - * ompi_proc table - * - * @param[in] *buf Pointer to a buffer containing the data - * - * @retval OMPI_SUCCESS Successfully exchanged information - * @retval OMPI_ERROR An unspecified error occurred - */ -OMPI_DECLSPEC int ompi_modex_process_data(orte_buffer_t *buf); - - -/** - * Initialize the modex system - * - * Allocate memory for the local data cache and initialize the - * module exchange system. Does not cause communication nor any - * subscriptions to be placed on the registry. - * - * @retval OMPI_SUCCESS Successfully initialized modex subsystem - */ -OMPI_DECLSPEC int ompi_modex_init(void); - - -/** - * Finalize the modex system - * - * Release any memory associated with the modex system, remove all - * subscriptions on the GPR and end all non-blocking update triggers - * currently available on the system. - * - * @retval OMPI_SUCCESS Successfully shut down modex subsystem - */ -OMPI_DECLSPEC int ompi_modex_finalize(void); - END_C_DECLS #endif /* MCA_OMPI_MODULE_EXCHANGE_H */ diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index 0c2abee30b..21ee80b46c 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -35,8 +35,8 @@ #include "opal/mca/backtrace/backtrace.h" #include "orte/util/proc_info.h" #include "orte/runtime/runtime.h" -#include "orte/runtime/params.h" -#include "orte/mca/ns/ns.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/sys_info.h" #include "ompi/communicator/communicator.h" @@ -81,7 +81,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, if (!ompi_mpi_initialized || ompi_mpi_finalized) { if (orte_initialized) { - orte_errmgr.error_detected(errcode, NULL); + orte_errmgr.abort(errcode, NULL); } } @@ -148,16 +148,14 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, abort_procs = (orte_process_name_t*)malloc(sizeof(orte_process_name_t) * nabort_procs); if (NULL == abort_procs) { /* quick clean orte and get out */ - orte_errmgr.error_detected(errcode, - "Abort unable to malloc memory to kill procs", - NULL); + orte_errmgr.abort(errcode, "Abort unable to malloc memory to kill procs"); } /* put all the local procs in the abort list */ for (i = 0 ; i < ompi_comm_size(comm) ; ++i) { - if (ORTE_EQUAL != orte_ns.compare_fields(ORTE_NS_CMP_ALL, + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &comm->c_local_group->grp_proc_pointers[i]->proc_name, - orte_process_info.my_name)) { + ORTE_PROC_MY_NAME)) { assert(count <= nabort_procs); abort_procs[count++] = comm->c_local_group->grp_proc_pointers[i]->proc_name; } else { @@ -169,9 +167,9 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, /* if requested, kill off remote procs too */ if (kill_remote_of_intercomm) { for (i = 0 ; i < ompi_comm_remote_size(comm) ; ++i) { - if (ORTE_EQUAL != orte_ns.compare_fields(ORTE_NS_CMP_ALL, + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &comm->c_remote_group->grp_proc_pointers[i]->proc_name, - orte_process_info.my_name)) { + ORTE_PROC_MY_NAME)) { assert(count <= nabort_procs); abort_procs[count++] = comm->c_remote_group->grp_proc_pointers[i]->proc_name; @@ -183,16 +181,16 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, } if (nabort_procs > 0) { +#if 0 ret = orte_errmgr.abort_procs_request(abort_procs, nabort_procs); if (OMPI_SUCCESS != ret) { - orte_errmgr.error_detected(ret, - "Open MPI failed to abort procs as requested (%d). Exiting.", - ret, NULL); + orte_errmgr.abort(ret, "Open MPI failed to abort procs as requested (%d). Exiting.", ret); } +#endif } /* now that we've aborted everyone else, gracefully die. */ - orte_errmgr.error_detected(errcode, NULL); + orte_errmgr.abort(errcode, NULL); return OMPI_SUCCESS; } diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index 2bdcf54c63..40a313a6b9 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -45,12 +45,8 @@ #include "opal/mca/carto/base/base.h" #include "orte/util/proc_info.h" -#include "orte/mca/schema/schema.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/runtime/runtime.h" @@ -66,7 +62,6 @@ #include "ompi/info/info.h" #include "ompi/runtime/mpiruntime.h" #include "ompi/attribute/attribute.h" -#include "ompi/runtime/ompi_module_exchange.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" #include "ompi/mca/osc/base/base.h" @@ -81,6 +76,8 @@ #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/runtime/params.h" #include "ompi/mca/mpool/base/mpool_base_tree.h" +#include "ompi/mca/dpm/base/base.h" +#include "ompi/mca/pubsub/base/base.h" #if OPAL_ENABLE_FT == 1 #include "ompi/mca/crcp/crcp.h" @@ -136,12 +133,6 @@ int ompi_mpi_finalize(void) MPI lifetime, to get better latency when not using TCP */ opal_progress_event_users_increment(); - /* mark that I called finalize before exiting */ - if (ORTE_SUCCESS != (ret = orte_smr.register_sync())) { - ORTE_ERROR_LOG(ret); - return ret; - } - /* If maffinity was setup, tear it down */ if (ompi_mpi_maffinity_setup) { opal_maffinity_base_close(); @@ -248,7 +239,17 @@ int ompi_mpi_finalize(void) if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) { return ret; } - + + /* finalize the pubsub functions */ + if ( OMPI_SUCCESS != (ret = ompi_pubsub_base_close())) { + return ret; + } + + /* finalize the DPM framework */ + if ( OMPI_SUCCESS != (ret = ompi_dpm_base_close())) { + return ret; + } + /* free internal error resources */ if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) { return ret; @@ -281,11 +282,6 @@ int ompi_mpi_finalize(void) return ret; } - /* free module exchange resources */ - if (OMPI_SUCCESS != (ret = ompi_modex_finalize())) { - return ret; - } - /* Close down MCA modules */ /* io is opened lazily, so it's only necessary to close it if it diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 038db8dec1..73e8da0a47 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -44,16 +44,12 @@ #include "orte/util/sys_info.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" +#include "orte/util/name_fns.h" #include "orte/runtime/runtime.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/gpr/gpr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/schema/schema.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/grpcomm.h" -#include "orte/runtime/params.h" +#include "orte/runtime/orte_globals.h" #include "ompi/constants.h" #include "ompi/mpi/f77/constants.h" @@ -84,6 +80,8 @@ #include "ompi/debuggers/debuggers.h" #include "ompi/proc/proc.h" #include "ompi/mca/pml/base/pml_base_bsend.h" +#include "ompi/mca/dpm/base/base.h" +#include "ompi/mca/pubsub/base/base.h" #if OPAL_ENABLE_FT == 1 #include "ompi/mca/crcp/crcp.h" @@ -228,7 +226,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_proc_t** procs; size_t nprocs; char *error = NULL; - orte_buffer_t mdx_buf, rbuf; bool timing = false; int param, value; struct timeval ompistart, ompistop; @@ -256,17 +253,11 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Setup ORTE stage 1, note that we are not infrastructre */ - if (ORTE_SUCCESS != (ret = orte_init(ORTE_NON_INFRASTRUCTURE))) { + if (ORTE_SUCCESS != (ret = orte_init(ORTE_NON_TOOL))) { error = "ompi_mpi_init: orte_init failed"; goto error; } - /* register myself to require that I finalize before exiting */ - if (ORTE_SUCCESS != (ret = orte_smr.register_sync())) { - error = "ompi_mpi_init: register sync failed"; - goto error; - } - /* check for timing request - get stop time and report elapsed time if so */ if (timing) { gettimeofday(&ompistop, NULL); @@ -303,7 +294,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } if (!set) { char *vpid; - orte_ns.get_vpid_string(&vpid, orte_process_info.my_name); + orte_util_convert_vpid_to_string(&vpid, ORTE_PROC_MY_NAME->vpid); opal_show_help("help-mpi-runtime", "mpi_init:startup:paffinity-unavailable", true, vpid); @@ -341,14 +332,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - /* Initialize module exchange - this MUST happen before proc_init - * as proc_init needs to send modex info! - */ - if (OMPI_SUCCESS != (ret = ompi_modex_init())) { - error = "ompi_modex_init() failed"; - goto error; - } - /* Initialize OMPI procs */ if (OMPI_SUCCESS != (ret = ompi_proc_init())) { error = "mca_proc_init() failed"; @@ -524,29 +507,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) gettimeofday(&ompistart, NULL); } - /* get the modex buffer so we can exchange it */ - OBJ_CONSTRUCT(&mdx_buf, orte_buffer_t); - if (OMPI_SUCCESS != (ret = ompi_modex_get_my_buffer(&mdx_buf))) { - error = "ompi_modex_execute() failed"; - goto error; - } - - /* execute the exchange - this function also acts as a barrier + /* exchange connection info - this function also acts as a barrier * as it will not return until the exchange is complete */ - OBJ_CONSTRUCT(&rbuf, orte_buffer_t); - if (OMPI_SUCCESS != (ret = orte_grpcomm.allgather(&mdx_buf, &rbuf))) { - error = "orte_gprcomm_allgather failed"; + if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(NULL))) { + error = "orte_grpcomm_modex failed"; goto error; } - OBJ_DESTRUCT(&mdx_buf); - - /* process the modex data into the proc structures */ - if (OMPI_SUCCESS != (ret = ompi_modex_process_data(&rbuf))) { - error = "ompi_modex_process_data failed"; - goto error; - } - OBJ_DESTRUCT(&rbuf); if (timing) { gettimeofday(&ompistop, NULL); @@ -680,10 +647,30 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } + /* Setup the publish/subscribe (PUBSUB) framework */ + if (OMPI_SUCCESS != (ret = ompi_pubsub_base_open())) { + error = "ompi_pubsub_base_open() failed"; + goto error; + } + if (OMPI_SUCCESS != (ret = ompi_pubsub_base_select())) { + error = "ompi_pubsub_base_select() failed"; + goto error; + } + + /* Setup the dynamic process management (DPM) framework */ + if (OMPI_SUCCESS != (ret = ompi_dpm_base_open())) { + error = "ompi_dpm_base_open() failed"; + goto error; + } + if (OMPI_SUCCESS != (ret = ompi_dpm_base_select())) { + error = "ompi_dpm_base_select() failed"; + goto error; + } + /* Check whether we have been spawned or not. We introduce that at the very end, since we need collectives, datatypes, ptls etc. up and running here.... */ - if (OMPI_SUCCESS != (ret = ompi_comm_dyn_init())) { + if (OMPI_SUCCESS != (ret = ompi_dpm.dyn_init())) { error = "ompi_comm_dyn_init() failed"; goto error; } @@ -773,7 +760,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) if (orte_debug_flag) { opal_output(0, "%s ompi_mpi_init completed", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } /* Do we need to wait for a TotalView-like debugger? */ diff --git a/ompi/runtime/ompi_mpi_preconnect.c b/ompi/runtime/ompi_mpi_preconnect.c index ced88cf367..bfb02c770d 100644 --- a/ompi/runtime/ompi_mpi_preconnect.c +++ b/ompi/runtime/ompi_mpi_preconnect.c @@ -23,9 +23,12 @@ #include "ompi/communicator/communicator.h" #include "ompi/request/request.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/mca/dpm/dpm.h" + #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" + int ompi_init_preconnect_mpi(void) { @@ -140,7 +143,7 @@ ompi_init_preconnect_oob(void) ret = orte_rml.send(&procs[next]->proc_name, outmsg, 1, - ORTE_RML_TAG_WIREUP, + OMPI_RML_TAG_WIREUP, 0); if (ret < 0) return ret; } @@ -150,7 +153,7 @@ ompi_init_preconnect_oob(void) ret = orte_rml.recv(&procs[prev]->proc_name, inmsg, 1, - ORTE_RML_TAG_WIREUP, + OMPI_RML_TAG_WIREUP, 0); if (ret < 0) return ret; } diff --git a/ompi/tools/Makefile.am b/ompi/tools/Makefile.am index bea3b83803..a4bf813d7c 100644 --- a/ompi/tools/Makefile.am +++ b/ompi/tools/Makefile.am @@ -24,9 +24,11 @@ EXTRA_DIST += tools/win_makefile SUBDIRS += \ tools/ompi_info \ tools/wrappers \ - tools/ortetools + tools/ortetools \ + tools/ompi-server DIST_SUBDIRS += \ tools/ompi_info \ tools/wrappers \ - tools/ortetools + tools/ortetools \ + tools/ompi-server diff --git a/orte/tools/orteprobe/Makefile.am b/ompi/tools/ompi-server/Makefile.am similarity index 61% rename from orte/tools/orteprobe/Makefile.am rename to ompi/tools/ompi-server/Makefile.am index 16e289df4c..a43fb04cf9 100644 --- a/orte/tools/orteprobe/Makefile.am +++ b/ompi/tools/ompi-server/Makefile.am @@ -18,22 +18,23 @@ if OMPI_INSTALL_BINARIES -dist_pkgdata_DATA = help-orteprobe.txt +dist_pkgdata_DATA = help-ompi-server.txt -bin_PROGRAMS = orteprobe +bin_PROGRAMS = ompi-server endif libs = \ - $(top_builddir)/orte/libopen-rte.la + $(top_builddir)/ompi/libmpi.la \ + $(top_builddir)/opal/libopen-pal.la -orteprobe_SOURCES = \ - orteprobe.h \ - orteprobe.c +ompi_server_SOURCES = \ + ompi-server.c +ompi_server_LDADD = $(libs) +ompi_server_LDFLAGS = +ompi_server_DEPENDENCIES = $(libs) -orteprobe_LDADD = $(libs) -orteprobe_LDFLAGS = -orteprobe_DEPENDENCIES = $(libs) - -clean-local: - test -z "$(OMPI_CXX_TEMPLATE_REPOSITORY)" || $(RM) -rf $(OMPI_CXX_TEMPLATE_REPOSITORY) +# AM 1.9.6 seems to have a bug in its dependencies for install-man if +# dist_ and nodist_ are used, so explicitly add to EXTRA_DIST... +man_MANS = ompi-server.1 +EXTRA_DIST = $(man_MANS) diff --git a/orte/mca/gpr/replica/transition_layer/Makefile.am b/ompi/tools/ompi-server/help-ompi-server.txt similarity index 81% rename from orte/mca/gpr/replica/transition_layer/Makefile.am rename to ompi/tools/ompi-server/help-ompi-server.txt index 612dc9f9cb..4baa0241be 100644 --- a/orte/mca/gpr/replica/transition_layer/Makefile.am +++ b/ompi/tools/ompi-server/help-ompi-server.txt @@ -1,3 +1,4 @@ +# -*- text -*- # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology @@ -15,9 +16,8 @@ # # $HEADER$ # - -noinst_LTLIBRARIES = libmca_gpr_replica_tl.la -libmca_gpr_replica_tl_la_SOURCES = \ - gpr_replica_tl.h \ - gpr_replica_dict_tl.c \ - gpr_replica_segment_tl.c +# This is the US/English general help file for Open MPI's data server. +# +[ompiserver:usage] +Usage: %s [OPTION]... +Start an Open MPI data server. diff --git a/ompi/tools/ompi-server/ompi-server.1 b/ompi/tools/ompi-server/ompi-server.1 new file mode 100644 index 0000000000..32af06bb0d --- /dev/null +++ b/ompi/tools/ompi-server/ompi-server.1 @@ -0,0 +1,77 @@ +.\" +.\" Copyright (c) 2007 Los Alamos National Security, LLC +.\" All rights reserved. +.\" +.\" Man page for OMPI's ompi-server command +.\" +.\" .TH name section center-footer left-footer center-header +.TH OMPI-SERVER 1 "Dec 2007" "Open MPI" "OPEN MPI COMMANDS" +.\" ************************** +.\" Name Section +.\" ************************** +.SH NAME +. +ompi-server \- Server for supporting name publish/lookup operations. +. +.PP +. +.\" ************************** +.\" Synopsis Section +.\" ************************** +.SH SYNOPSIS +. +.B ompi-server +.R [ options ] +. +.\" ************************** +.\" Options Section +.\" ************************** +.SH Options +. +\fIompi-server\fR acts as a data server for Open MPI jobs to exchange +contact information in support of MPI-2's Publish_name and Lookup_name +functions. +. +.TP 10 +.B -h | --help +Display help for this command +. +. +.TP +.B -d | --debug +Enable verbose output for debugging +. +. +.TP +.B --debug-devel +Enable verbose debugging output from the Open RTE. +. +. +.TP +.B --report-uri \fR\fP +Report the Open MPI contact information for the server. This information is +required for MPI jobs to use the data server. Three parameter values are supported: +(a) '-', indicating that the uri is to be printed to stdout; (b) '+', indicating that +the uri is to be printed to stderr; and (c) "file:path-to-file", indicating that +the uri is to be printed to the specified file. The "path-to-file" can be either +absolute or relative, but must be in a location where the user has write +permissions. Please note that the resulting file must be read-accessible to +expected users of the server. +. +. +.\" ************************** +.\" Description Section +.\" ************************** +.SH DESCRIPTION +. +.PP +\fIompi-server\fR acts as a data server for Open MPI jobs to exchange +contact information in support of MPI-2's Publish_name and Lookup_name +functions. +. +.\" ************************** +.\" See Also Section +.\" ************************** +. +.SH SEE ALSO +. diff --git a/ompi/tools/ompi-server/ompi-server.c b/ompi/tools/ompi-server/ompi-server.c new file mode 100644 index 0000000000..13bc9830f7 --- /dev/null +++ b/ompi/tools/ompi-server/ompi-server.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * Copyright (c) 2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_NETDB_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#include +#include +#include + + +#include "opal/event/event.h" +#include "opal/mca/base/base.h" +#include "opal/util/cmd_line.h" +#include "opal/util/output.h" +#include "opal/util/printf.h" +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/util/daemon_init.h" +#include "opal/runtime/opal.h" +#include "opal/mca/base/mca_base_param.h" + + +#include "orte/util/name_fns.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_data_server.h" + +/* + * Globals + */ + +static opal_event_t term_handler; +static opal_event_t int_handler; + +static void shutdown_callback(int fd, short flags, void *arg); + +static bool help=false; +static bool debug=false; +static bool no_daemonize=false; +static char *report_uri=NULL; + +/* + * define the context table for obtaining parameters + */ +opal_cmd_line_init_t ompi_server_cmd_line_opts[] = { + /* Various "obvious" options */ + { NULL, NULL, NULL, 'h', NULL, "help", 0, + &help, OPAL_CMD_LINE_TYPE_BOOL, + "This help message" }, + + { NULL, NULL, NULL, 'd', NULL, "debug", 0, + &debug, OPAL_CMD_LINE_TYPE_BOOL, + "Debug the Open MPI server" }, + + { "orte", "debug", NULL, '\0', NULL, "debug-devel", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Debug the OpenRTE" }, + + { "orte", "no_daemonize", NULL, '\0', NULL, "no-daemonize", 0, + &no_daemonize, OPAL_CMD_LINE_TYPE_BOOL, + "Don't daemonize into the background" }, + + { NULL, NULL, NULL, '\0', NULL, "report-uri", 1, + &report_uri, OPAL_CMD_LINE_TYPE_STRING, + "Report the server's uri on stdout"}, + + /* End of list */ + { NULL, NULL, NULL, '\0', NULL, NULL, 0, + NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } +}; + +int main(int argc, char *argv[]) +{ + int ret = 0; + opal_cmd_line_t *cmd_line = NULL; + char *rml_uri; + + /* init enough of opal to process cmd lines */ + if (OPAL_SUCCESS != opal_init_util()) { + fprintf(stderr, "OPAL failed to initialize -- orted aborting\n"); + exit(1); + } + + /* setup to check common command line options that just report and die */ + cmd_line = OBJ_NEW(opal_cmd_line_t); + opal_cmd_line_create(cmd_line, ompi_server_cmd_line_opts); + mca_base_cmd_line_setup(cmd_line); + if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false, + argc, argv))) { + char *args = NULL; + args = opal_cmd_line_get_usage_msg(cmd_line); + opal_show_help("help-ompi-server.txt", "ompiserver:usage", false, + argv[0], args); + free(args); + return ret; + } + + /* check for help request */ + if (help) { + char *args = NULL; + args = opal_cmd_line_get_usage_msg(cmd_line); + opal_show_help("help-ompi-server.txt", "ompiserver:usage", false, + argv[0], args); + free(args); + return 1; + } + + /* + * Since this process can now handle MCA/GMCA parameters, make sure to + * process them. + */ + mca_base_cmd_line_process_args(cmd_line, &environ, &environ); + + /* register and process the orte params */ + if (ORTE_SUCCESS != (ret = orte_register_params())) { + return ret; + } + + /* detach from controlling terminal + * otherwise, remain attached so output can get to us + */ + if(orte_debug_flag == false && + debug == false && + no_daemonize == false) { + opal_daemon_init(NULL); + } + +#if OPAL_ENABLE_FT == 1 + /* Disable the checkpoint notification routine for this + * tool. As we will never need to checkpoint this tool. + * Note: This must happen before opal_init(). + */ + opal_cr_set_enabled(false); + + /* Select the none component, since we don't actually use a checkpointer */ + opal_setenv(mca_base_param_env_var("crs"), + "none", + true, &environ); + /* Mark as a tool program */ + opal_setenv(mca_base_param_env_var("opal_cr_is_tool"), + "1", + true, &environ); +#endif + + /* Perform the standard init, but flag that we are a tool + * so that we only open up the communications infrastructure. No + * session directories will be created. + */ + if (ORTE_SUCCESS != (ret = orte_init(ORTE_TOOL))) { + fprintf(stderr, "ompi-server: failed to initialize -- aborting\n"); + exit(1); + } + + /* report out our URI, if we were requested to do so, using syntax + * proposed in an email thread by Jeff Squyres + */ + if (NULL != report_uri) { + rml_uri = orte_rml.get_contact_info(); + if (0 == strcmp(report_uri, "-")) { + /* if '-', then output to stdout */ + printf("%s\n", rml_uri); + } else if (0 == strcmp(report_uri, "+")) { + /* if '+', output to stderr */ + fprintf(stderr, "%s\n", rml_uri); + } else { + /* treat it as a filename and output into it */ + FILE *fp; + fp = fopen(report_uri, "w"); + if (NULL == fp) { + fprintf(stderr, "ompi-server: failed to open designated file -- aborting\n"); + orte_finalize(); + exit(1); + } + fprintf(fp, "%s\n", rml_uri); + fclose(fp); + } + free(rml_uri); + } + + /* setup the data server to listen for commands */ + if (ORTE_SUCCESS != (ret = orte_data_server_init())) { + fprintf(stderr, "ompi-server: failed to start data server -- aborting\n"); + orte_finalize(); + exit(1); + } + + /* Set signal handlers to catch kill signals so we can properly clean up + * after ourselves. + */ + opal_event_set(&term_handler, SIGTERM, OPAL_EV_SIGNAL, + shutdown_callback, NULL); + opal_event_add(&term_handler, NULL); + opal_event_set(&int_handler, SIGINT, OPAL_EV_SIGNAL, + shutdown_callback, NULL); + opal_event_add(&int_handler, NULL); + + /* We actually do *not* want the server to voluntarily yield() the + processor more than necessary. The server already blocks when + it is doing nothing, so it doesn't use any more CPU cycles than + it should; but when it *is* doing something, we do not want it + to be unnecessarily delayed because it voluntarily yielded the + processor in the middle of its work. + + For example: when a message arrives at the server, we want the + OS to wake up the server in a timely fashion (which most OS's + seem good about doing) and then we want the server to process + the message as fast as possible. If the server yields and lets + aggressive MPI applications get the processor back, it may be a + long time before the OS schedules the server to run again + (particularly if there is no IO event to wake it up). Hence, + publish and lookup (for example) may be significantly delayed + before being delivered to MPI processes, which can be + problematic in some scenarios (e.g., COMM_SPAWN). */ + opal_progress_set_yield_when_idle(false); + + /* Change the default behavior of libevent such that we want to + continually block rather than blocking for the default timeout + and then looping around the progress engine again. There + should be nothing in the server that cannot block in libevent + until "something" happens (i.e., there's no need to keep + cycling through progress because the only things that should + happen will happen in libevent). This is a minor optimization, + but what the heck... :-) */ + opal_progress_set_event_flag(OPAL_EVLOOP_ONCE); + + if (debug) { + opal_output(0, "%s ompi-server: up and running!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + } + + /* wait to hear we are done */ + opal_event_dispatch(); + + /* should never get here, but if we do... */ + + /* Finalize and clean up ourselves */ + if (ORTE_SUCCESS != (ret = orte_finalize())) { + ORTE_ERROR_LOG(ret); + } + return ret; +} + +static void shutdown_callback(int fd, short flags, void *arg) +{ + int ret; + + if (debug) { + opal_output(0, "%s ompi-server: finalizing", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + } + + /* Finalize and clean up ourselves */ + if (ORTE_SUCCESS != (ret = orte_finalize())) { + ORTE_ERROR_LOG(ret); + } + exit(ret); +} diff --git a/ompi/tools/ompi_info/components.cc b/ompi/tools/ompi_info/components.cc index 0b86e930c4..17ae8ffc9b 100644 --- a/ompi/tools/ompi_info/components.cc +++ b/ompi/tools/ompi_info/components.cc @@ -52,6 +52,7 @@ #include "opal/mca/crs/base/base.h" #endif #include "opal/runtime/opal.h" +#include "opal/dss/dss.h" #include "ompi/mca/allocator/allocator.h" #include "ompi/mca/allocator/base/base.h" @@ -75,6 +76,9 @@ #include "ompi/mca/topo/base/base.h" #include "ompi/mca/osc/osc.h" #include "ompi/mca/osc/base/base.h" +#include "ompi/mca/pubsub/base/base.h" +#include "ompi/mca/dpm/base/base.h" + #if OPAL_ENABLE_FT == 1 #include "ompi/mca/crcp/crcp.h" #include "ompi/mca/crcp/base/base.h" @@ -82,38 +86,26 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/base/base.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/gpr/base/base.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/base.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/ns/base/base.h" #include "orte/mca/oob/oob.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/odls/odls.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/ras/ras.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/rds/rds.h" -#include "orte/mca/rds/base/base.h" +#include "orte/mca/ras/base/ras_private.h" #include "orte/mca/rmaps/rmaps.h" #include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/rmgr/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/mca/routed/base/base.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/base/base.h" -#include "orte/mca/odls/odls.h" -#include "orte/mca/odls/base/base.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/smr/base/base.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/base/base.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" #if OPAL_ENABLE_FT == 1 #include "orte/mca/snapc/snapc.h" #include "orte/mca/snapc/base/base.h" @@ -176,9 +168,9 @@ void ompi_info::open_components() // it. opal_event_init(); - // Open the DPS + // Open the DSS - if (ORTE_SUCCESS != orte_dss_open()) { + if (ORTE_SUCCESS != opal_dss_open()) { printf( "Unable to initialize the DSS\n" ); return; } @@ -193,7 +185,7 @@ void ompi_info::open_components() // Register the ORTE layer's MCA parameters - orte_register_params(false); + orte_register_params(); // Register the MPI layer's MCA parameters @@ -236,9 +228,9 @@ void ompi_info::open_components() component_map["installdirs"] = &opal_installdirs_components; // ORTE frameworks - // Set orte_process_info.seed to true to force all frameworks to + // Set orte_process_info.hnp to true to force all frameworks to // open components - orte_process_info.seed = true; + orte_process_info.hnp = true; mca_oob_base_open(); component_map["oob"] = &mca_oob_base_components; @@ -249,29 +241,17 @@ void ompi_info::open_components() orte_errmgr_base_open(); component_map["errmgr"] = &orte_errmgr_base_components_available; - orte_gpr_base_open(); - component_map["gpr"] = &orte_gpr_base_components_available; - orte_grpcomm_base_open(); component_map["grpcomm"] = &mca_grpcomm_base_components_available; orte_iof_base_open(); component_map["iof"] = &orte_iof_base.iof_components_opened; - orte_ns_base_open(); - component_map["ns"] = &mca_ns_base_components_available; - orte_ras_base_open(); component_map["ras"] = &orte_ras_base.ras_opened; - orte_rds_base_open(); - component_map["rds"] = &orte_rds_base.rds_components; - orte_rmaps_base_open(); - component_map["rmaps"] = &orte_rmaps_base.rmaps_opened; - - orte_rmgr_base_open(); - component_map["rmgr"] = &orte_rmgr_base.rmgr_components; + component_map["rmaps"] = &orte_rmaps_base.available_components; orte_rml_base_open(); component_map["rml"] = &orte_rml_base_components; @@ -279,17 +259,11 @@ void ompi_info::open_components() orte_routed_base_open(); component_map["routed"] = &orte_routed_base_components; - orte_pls_base_open(); - component_map["pls"] = &orte_pls_base.available_components; + orte_plm_base_open(); + component_map["plm"] = &orte_plm_base.available_components; - orte_odls_base_open(); - component_map["odls"] = &orte_odls_base.available_components; - - orte_sds_base_open(); - component_map["sds"] = &orte_sds_base_components_available; - - orte_smr_base_open(); - component_map["smr"] = &orte_smr_base.smr_components; + orte_ess_base_open(); + component_map["ess"] = &orte_ess_base_components_available; #if OPAL_ENABLE_FT == 1 orte_snapc_base_open(); @@ -335,6 +309,12 @@ void ompi_info::open_components() mca_topo_base_open(); component_map["topo"] = &mca_topo_base_components_opened; + ompi_pubsub_base_open(); + component_map["pubsub"] = &ompi_pubsub_base_components_available; + + ompi_dpm_base_open(); + component_map["dpm"] = &ompi_dpm_base_components_available; + #if OPAL_ENABLE_FT == 1 ompi_crcp_base_open(); component_map["crcp"] = &ompi_crcp_base_components_available; @@ -369,6 +349,8 @@ void ompi_info::close_components() #if OPAL_ENABLE_FT == 1 ompi_crcp_base_close(); #endif + ompi_dpm_base_close(); + ompi_pubsub_base_close(); mca_topo_base_close(); // the PML has to call the base PTL close function. mca_btl_base_close(); @@ -386,16 +368,11 @@ void ompi_info::close_components() #endif orte_filem_base_close(); orte_iof_base_close(); - orte_sds_base_close(); - orte_smr_base_close(); - orte_pls_base_close(); + orte_ess_base_close(); + orte_plm_base_close(); orte_odls_base_close(); - orte_rmgr_base_close(); orte_rmaps_base_close(); - orte_rds_base_close(); orte_ras_base_close(); - orte_ns_base_close(); - orte_gpr_base_close(); orte_grpcomm_base_close(); orte_errmgr_base_close(); orte_rml_base_close(); diff --git a/ompi/tools/ompi_info/ompi_info.cc b/ompi/tools/ompi_info/ompi_info.cc index 8154f8ec5a..2741a821d6 100644 --- a/ompi/tools/ompi_info/ompi_info.cc +++ b/ompi/tools/ompi_info/ompi_info.cc @@ -191,6 +191,8 @@ int main(int argc, char *argv[]) #if OPAL_ENABLE_FT == 1 ompi_info::mca_types.push_back("crs"); #endif + ompi_info::mca_types.push_back("dpm"); + ompi_info::mca_types.push_back("pubsub"); ompi_info::mca_types.push_back("allocator"); ompi_info::mca_types.push_back("coll"); @@ -209,21 +211,17 @@ int main(int argc, char *argv[]) #endif ompi_info::mca_types.push_back("errmgr"); - ompi_info::mca_types.push_back("gpr"); ompi_info::mca_types.push_back("grpcomm"); ompi_info::mca_types.push_back("iof"); - ompi_info::mca_types.push_back("ns"); ompi_info::mca_types.push_back("oob"); ompi_info::mca_types.push_back("odls"); + ompi_info::mca_types.push_back("ess"); ompi_info::mca_types.push_back("ras"); - ompi_info::mca_types.push_back("rds"); ompi_info::mca_types.push_back("rmaps"); - ompi_info::mca_types.push_back("rmgr"); ompi_info::mca_types.push_back("rml"); ompi_info::mca_types.push_back("routed"); - ompi_info::mca_types.push_back("pls"); + ompi_info::mca_types.push_back("plm"); ompi_info::mca_types.push_back("sds"); - ompi_info::mca_types.push_back("soh"); #if OPAL_ENABLE_FT == 1 ompi_info::mca_types.push_back("snapc"); #endif diff --git a/opal/Makefile.am b/opal/Makefile.am index 71187f2819..38b87f70aa 100644 --- a/opal/Makefile.am +++ b/opal/Makefile.am @@ -84,3 +84,4 @@ include threads/Makefile.am include mca/Makefile.am include win32/Makefile.am include tools/Makefile.am +include dss/Makefile.am diff --git a/orte/dss/Makefile.am b/opal/dss/Makefile.am similarity index 94% rename from orte/dss/Makefile.am rename to opal/dss/Makefile.am index 69e9c4aa4b..01fefd33bf 100644 --- a/orte/dss/Makefile.am +++ b/opal/dss/Makefile.am @@ -17,15 +17,14 @@ # $HEADER$ # -# This makefile.am does not stand on its own - it is included from orte/Makefile.am +# This makefile.am does not stand on its own - it is included from opal/Makefile.am headers += \ dss/dss.h \ dss/dss_types.h \ dss/dss_internal.h -noinst_LTLIBRARIES += dss/libdss.la -dss_libdss_la_SOURCES = \ +libopen_pal_la_SOURCES += \ dss/dss_internal_functions.c \ dss/dss_arith.c \ dss/dss_compare.c \ diff --git a/orte/dss/dss.h b/opal/dss/dss.h similarity index 70% rename from orte/dss/dss.h rename to opal/dss/dss.h index 47e582310a..16d18b6786 100644 --- a/orte/dss/dss.h +++ b/opal/dss/dss.h @@ -22,19 +22,16 @@ * Data packing subsystem. */ -#ifndef ORTE_DSS_H_ -#define ORTE_DSS_H_ +#ifndef OPAL_DSS_H_ +#define OPAL_DSS_H_ -#include "orte_config.h" +#include "opal_config.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" +#include "opal/types.h" -#include "orte/dss/dss_types.h" +#include "opal/dss/dss_types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /** * Set the buffer type. @@ -51,11 +48,11 @@ extern "C" { * * @param type The new buffer type * - * @retval ORTE_SUCCESS Operation successfully executed + * @retval OPAL_SUCCESS Operation successfully executed * - * @retval ORTE_ERROR_VALUE An appropriate error code + * @retval OPAL_ERROR_VALUE An appropriate error code */ -typedef int (*orte_dss_set_buffer_type_fn_t)(orte_buffer_t *buffer, orte_dss_buffer_type_t type); +typedef int (*opal_dss_set_buffer_type_fn_t)(opal_buffer_t *buffer, opal_dss_buffer_type_t type); /** * Top-level itnerface function to pack one or more values into a @@ -95,22 +92,22 @@ typedef int (*orte_dss_set_buffer_type_fn_t)(orte_buffer_t *buffer, orte_dss_buf * @param type The type of the data to be packed - must be one of the * DSS defined data types. * - * @retval ORTE_SUCCESS The data was packed as requested. + * @retval OPAL_SUCCESS The data was packed as requested. * - * @retval ORTE_ERROR(s) An appropriate ORTE error code indicating the + * @retval OPAL_ERROR(s) An appropriate OPAL error code indicating the * problem encountered. This error code should be handled * appropriately. * * @code - * orte_buffer_t *buffer; + * opal_buffer_t *buffer; * int32_t src; * - * status_code = orte_dss.pack(buffer, &src, 1, ORTE_INT32); + * status_code = opal_dss.pack(buffer, &src, 1, OPAL_INT32); * @endcode */ -typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_values, - orte_data_type_t type); +typedef int (*opal_dss_pack_fn_t)(opal_buffer_t *buffer, const void *src, + int32_t num_values, + opal_data_type_t type); /** * Unpack values from a buffer. @@ -141,7 +138,7 @@ typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, * same buffer. * * Warning: The caller is responsible for providing adequate memory - * storage for the requested data. The orte_dss_peek() function is + * storage for the requested data. The opal_dss_peek() function is * provided to assist in meeting this requirement. As noted below, the user * must provide a parameter indicating the maximum number of values that * can be unpacked into the allocated memory. If more values exist in the @@ -169,7 +166,7 @@ typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, * string in the array - the caller must only provide adequate memory * for the array of pointers. * - * @param *num A pointer to a orte_std_cntr_t value indicating the maximum + * @param *num A pointer to a int32_t value indicating the maximum * number of values that are to be unpacked, beginning at the location * pointed to by src. This is provided to help protect the caller from * memory overrun. Note that a string @@ -188,10 +185,10 @@ typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, * available, the buffer will be in an unpackable state - the dss will * return an error code to warn of this condition. * - * @retval ORTE_SUCCESS The next item in the buffer was successfully + * @retval OPAL_SUCCESS The next item in the buffer was successfully * unpacked. * - * @retval ORTE_ERROR(s) The unpack function returns an error code + * @retval OPAL_ERROR(s) The unpack function returns an error code * under one of several conditions: (a) the number of values in the * item exceeds the max num provided by the caller; (b) the type of * the next item in the buffer does not match the type specified by @@ -199,23 +196,23 @@ typedef int (*orte_dss_pack_fn_t)(orte_buffer_t *buffer, const void *src, * buffer or an attempt to read past the end of the buffer. * * @code - * orte_buffer_t *buffer; + * opal_buffer_t *buffer; * int32_t dest; * char **string_array; - * orte_std_cntr_t num_values; + * int32_t num_values; * * num_values = 1; - * status_code = orte_dss.unpack(buffer, (void*)&dest, &num_values, ORTE_INT32); + * status_code = opal_dss.unpack(buffer, (void*)&dest, &num_values, OPAL_INT32); * * num_values = 5; * string_array = malloc(num_values*sizeof(char *)); - * status_code = orte_dss.unpack(buffer, (void*)(string_array), &num_values, ORTE_STRING); + * status_code = opal_dss.unpack(buffer, (void*)(string_array), &num_values, OPAL_STRING); * * @endcode */ -typedef int (*orte_dss_unpack_fn_t)(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *max_num_values, - orte_data_type_t type); +typedef int (*opal_dss_unpack_fn_t)(opal_buffer_t *buffer, void *dest, + int32_t *max_num_values, + opal_data_type_t type); /** * Get the type and number of values of the next item in the buffer. @@ -227,23 +224,23 @@ typedef int (*orte_dss_unpack_fn_t)(orte_buffer_t *buffer, void *dest, * * @param buffer A pointer to the buffer in question. * - * @param type A pointer to an orte_data_type_t variable where the + * @param type A pointer to an opal_data_type_t variable where the * type of the next item in the buffer is to be stored. Caller must * have memory backing this location. * - * @param number A pointer to a orte_std_cntr_t variable where the number of + * @param number A pointer to a int32_t variable where the number of * data values in the next item is to be stored. Caller must have * memory backing this location. * - * @retval ORTE_SUCCESS Requested info was successfully returned. - * @retval ORTE_ERROR(s) An appropriate error code indicating the + * @retval OPAL_SUCCESS Requested info was successfully returned. + * @retval OPAL_ERROR(s) An appropriate error code indicating the * problem will be returned. This should be handled appropriately by * the caller. * */ -typedef int (*orte_dss_peek_next_item_fn_t)(orte_buffer_t *buffer, - orte_data_type_t *type, - orte_std_cntr_t *number); +typedef int (*opal_dss_peek_next_item_fn_t)(opal_buffer_t *buffer, + opal_data_type_t *type, + int32_t *number); /** * Unload the data payload from a buffer. @@ -266,24 +263,24 @@ typedef int (*orte_dss_peek_next_item_fn_t)(orte_buffer_t *buffer, * * @param size The size (in bytes) of the data payload in the buffer. * - * @retval ORTE_SUCCESS The request was succesfully completed. + * @retval OPAL_SUCCESS The request was succesfully completed. * - * @retval ORTE_ERROR(s) An appropriate error code indicating the + * @retval OPAL_ERROR(s) An appropriate error code indicating the * problem will be returned. This should be handled appropriately by * the caller. * * @code - * orte_buffer_t *buffer; + * opal_buffer_t *buffer; * uint8_t *bytes; - * orte_std_cntr_t size; + * int32_t size; * - * status_code = orte_dss.unload(buffer, (void**)(&bytes), &size); + * status_code = opal_dss.unload(buffer, (void**)(&bytes), &size); * OBJ_RELEASE(buffer); * @endcode */ -typedef int (*orte_dss_unload_fn_t)(orte_buffer_t *buffer, +typedef int (*opal_dss_unload_fn_t)(opal_buffer_t *buffer, void **payload, - orte_std_cntr_t *size); + int32_t *size); /** * Load a data payload into a buffer. @@ -310,24 +307,24 @@ typedef int (*orte_dss_unload_fn_t)(orte_buffer_t *buffer, * * @param size The size (in bytes) of the provided payload. * - * @retval ORTE_SUCCESS The request was successfully completed + * @retval OPAL_SUCCESS The request was successfully completed * - * @retval ORTE_ERROR(s) An appropriate error code indicating the + * @retval OPAL_ERROR(s) An appropriate error code indicating the * problem will be returned. This should be handled appropriately by * the caller. * * @code - * orte_buffer_t *buffer; + * opal_buffer_t *buffer; * uint8_t bytes; - * orte_std_cntr_t size; + * int32_t size; * - * buffer = OBJ_NEW(orte_buffer_t); - * status_code = orte_dss.load(buffer, (void*)(&bytes), size); + * buffer = OBJ_NEW(opal_buffer_t); + * status_code = opal_dss.load(buffer, (void*)(&bytes), size); * @endcode */ -typedef int (*orte_dss_load_fn_t)(orte_buffer_t *buffer, +typedef int (*opal_dss_load_fn_t)(opal_buffer_t *buffer, void *payload, - orte_std_cntr_t size); + int32_t size); /** @@ -336,8 +333,8 @@ typedef int (*orte_dss_load_fn_t)(orte_buffer_t *buffer, * payload from one buffer and loads it into another. This is a destructive * action - see the unload and load descriptions above. */ -typedef int (*orte_dss_xfer_payload_fn_t)(orte_buffer_t *dest, - orte_buffer_t *src); +typedef int (*opal_dss_xfer_payload_fn_t)(opal_buffer_t *dest, + opal_buffer_t *src); /** * Copy a payload from one buffer to another @@ -350,23 +347,23 @@ typedef int (*orte_dss_xfer_payload_fn_t)(orte_buffer_t *dest, * source buffer's payload will remain intact, as will any pre-existing * payload in the destination's buffer. */ -typedef int (*orte_dss_copy_payload_fn_t)(orte_buffer_t *dest, - orte_buffer_t *src); +typedef int (*opal_dss_copy_payload_fn_t)(opal_buffer_t *dest, + opal_buffer_t *src); /** * DSS initialization function. * * In dynamic libraries, declared objects and functions don't get - * loaded until called. We need to ensure that the orte_dss function + * loaded until called. We need to ensure that the opal_dss function * structure gets loaded, so we provide an "open" call that is * executed as part of the program startup. */ -ORTE_DECLSPEC int orte_dss_open(void); +OPAL_DECLSPEC int opal_dss_open(void); /** * DSS finalize function */ -ORTE_DECLSPEC int orte_dss_close(void); +OPAL_DECLSPEC int opal_dss_close(void); /** @@ -387,12 +384,12 @@ ORTE_DECLSPEC int orte_dss_close(void); * @param type The type of the data to be copied - must be one of * the DSS defined data types. * - * @retval ORTE_SUCCESS The value was successfully copied. + * @retval OPAL_SUCCESS The value was successfully copied. * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. * */ -typedef int (*orte_dss_copy_fn_t)(void **dest, void *src, orte_data_type_t type); +typedef int (*opal_dss_copy_fn_t)(void **dest, void *src, opal_data_type_t type); /** * Compare two data values. @@ -407,8 +404,8 @@ typedef int (*orte_dss_copy_fn_t)(void **dest, void *src, orte_data_type_t type) * @retval 0 Indicates two values are equal * @retval +1 Indicates second value is greater than first value */ -typedef int (*orte_dss_compare_fn_t)(void *value1, void *value2, - orte_data_type_t type); +typedef int (*opal_dss_compare_fn_t)(void *value1, void *value2, + opal_data_type_t type); /** @@ -429,11 +426,11 @@ typedef int (*orte_dss_compare_fn_t)(void *value1, void *value2, * @param type The type of the data value - must be one of * the DSS defined data types or an error will be returned. * - * @retval ORTE_SUCCESS The value was successfully copied. + * @retval OPAL_SUCCESS The value was successfully copied. * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_size_fn_t)(size_t *size, void *src, orte_data_type_t type); +typedef int (*opal_dss_size_fn_t)(size_t *size, void *src, opal_data_type_t type); /** @@ -443,11 +440,11 @@ typedef int (*orte_dss_size_fn_t)(size_t *size, void *src, orte_data_type_t type * needs some way to know how to print them (i.e., convert them to a string * representation). * - * @retval ORTE_SUCCESS The value was successfully printed. + * @retval OPAL_SUCCESS The value was successfully printed. * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_print_fn_t)(char **output, char *prefix, void *src, orte_data_type_t type); +typedef int (*opal_dss_print_fn_t)(char **output, char *prefix, void *src, opal_data_type_t type); /** @@ -456,11 +453,11 @@ typedef int (*orte_dss_print_fn_t)(char **output, char *prefix, void *src, orte_ * Uses the dss.print command to obtain a string version of the data value * and prints it to the designated output stream. * - * @retval ORTE_SUCCESS The value was successfully printed. + * @retval OPAL_SUCCESS The value was successfully printed. * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_dump_fn_t)(int output_stream, void *src, orte_data_type_t type); +typedef int (*opal_dss_dump_fn_t)(int output_stream, void *src, opal_data_type_t type); /** * Set a data value @@ -473,11 +470,11 @@ typedef int (*orte_dss_dump_fn_t)(int output_stream, void *src, orte_data_type_t * and type to the specified location and type. Use "copy" if you want dynamic allocation * of storage. * - * @retval ORTE_SUCCESS The value was successfully stored + * @retval OPAL_SUCCESS The value was successfully stored * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_set_fn_t)(orte_data_value_t *value, void *new_value, orte_data_type_t type); +typedef int (*opal_dss_set_fn_t)(opal_dss_value_t *value, void *new_value, opal_data_type_t type); /** * Get a data value @@ -490,12 +487,12 @@ typedef int (*orte_dss_set_fn_t)(orte_data_value_t *value, void *new_value, orte * to that of the value, after ensuring that the value's type matches the specified one. * Use "copy" if you want dynamic allocation of memory. * - * @retval ORTE_SUCCESS The value was successfully retrieved + * @retval OPAL_SUCCESS The value was successfully retrieved * - * @retval ORTE_ERROR(s) An appropriate error code - usually caused by the specified type + * @retval OPAL_ERROR(s) An appropriate error code - usually caused by the specified type * not matching the data type within the stored object. */ -typedef int (*orte_dss_get_fn_t)(void **data, orte_data_value_t *value, orte_data_type_t type); +typedef int (*opal_dss_get_fn_t)(void **data, opal_dss_value_t *value, opal_data_type_t type); /** * Perform an arithemetic operation on a data value @@ -504,12 +501,12 @@ typedef int (*orte_dss_get_fn_t)(void **data, orte_data_value_t *value, orte_dat * a function by which it can manipulate the data value within the data_value object. This * is the equivalent to a C++ access function. * - * @retval ORTE_SUCCESS The value was successfully retrieved + * @retval OPAL_SUCCESS The value was successfully retrieved * - * @retval ORTE_ERROR(s) An appropriate error code - usually caused by the specified type + * @retval OPAL_ERROR(s) An appropriate error code - usually caused by the specified type * not matching the data type within the stored object. */ -typedef int (*orte_dss_arith_fn_t)(orte_data_value_t *value, orte_data_value_t *operand, orte_dss_arith_op_t operation); +typedef int (*opal_dss_arith_fn_t)(opal_dss_value_t *value, opal_dss_value_t *operand, opal_dss_arith_op_t operation); /** * Increment a data value @@ -518,11 +515,11 @@ typedef int (*orte_dss_arith_fn_t)(orte_data_value_t *value, orte_data_value_t * * a function by which it can manipulate the data value within the data_value object. This * is the equivalent to a C++ access function. * - * @retval ORTE_SUCCESS The value was successfully retrieved + * @retval OPAL_SUCCESS The value was successfully retrieved * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_increment_fn_t)(orte_data_value_t *value); +typedef int (*opal_dss_increment_fn_t)(opal_dss_value_t *value); /** * Decrement a data value @@ -531,11 +528,11 @@ typedef int (*orte_dss_increment_fn_t)(orte_data_value_t *value); * a function by which it can manipulate the data value within the data_value object. This * is the equivalent to a C++ access function. * - * @retval ORTE_SUCCESS The value was successfully retrieved + * @retval OPAL_SUCCESS The value was successfully retrieved * - * @retval ORTE_ERROR(s) An appropriate error code. + * @retval OPAL_ERROR(s) An appropriate error code. */ -typedef int (*orte_dss_decrement_fn_t)(orte_data_value_t *value); +typedef int (*opal_dss_decrement_fn_t)(opal_dss_value_t *value); /** * Release the storage used by a data value @@ -544,21 +541,21 @@ typedef int (*orte_dss_decrement_fn_t)(orte_data_value_t *value); * a function by which it can release the storage associated with a value * stored in a data value object. */ -typedef void (*orte_dss_release_fn_t)(orte_data_value_t *value); +typedef void (*opal_dss_release_fn_t)(opal_dss_value_t *value); /** * Register a set of data handling functions. * * * This function registers a set of data type functions for a specific * type. An integer is returned that should be used a an argument to - * future invocations of orte_dss.pack(), orte_dss.unpack(), orte_dss.copy(), - * and orte_dss.compare, which + * future invocations of opal_dss.pack(), opal_dss.unpack(), opal_dss.copy(), + * and opal_dss.compare, which * will trigger calls to the appropriate functions. This * is most useful when extending the datatypes that the dss can - * handle; pack and unpack functions can nest calls to orte_dss.pack() - * / orte_dss.unpack(), so defining small pack/unpack functions can be + * handle; pack and unpack functions can nest calls to opal_dss.pack() + * / opal_dss.unpack(), so defining small pack/unpack functions can be * used recursively to build larger types (e.g., packing/unpacking - * structs can use calls to orte_dss.pack()/unpack() to serialize / + * structs can use calls to opal_dss.pack()/unpack() to serialize / * deserialize individual members). This is likewise true for the copy * and compare functions. * @@ -575,28 +572,38 @@ typedef void (*orte_dss_release_fn_t)(orte_data_value_t *value); * @param name [IN] String name for this pair (mainly for debugging) * @param type [OUT] Type number for this registration * - * @returns ORTE_SUCCESS upon success + * @returns OPAL_SUCCESS upon success * */ -typedef int (*orte_dss_register_fn_t)(orte_dss_pack_fn_t pack_fn, - orte_dss_unpack_fn_t unpack_fn, - orte_dss_copy_fn_t copy_fn, - orte_dss_compare_fn_t compare_fn, - orte_dss_size_fn_t size_fn, - orte_dss_print_fn_t print_fn, - orte_dss_release_fn_t release_fn, +typedef int (*opal_dss_register_fn_t)(opal_dss_pack_fn_t pack_fn, + opal_dss_unpack_fn_t unpack_fn, + opal_dss_copy_fn_t copy_fn, + opal_dss_compare_fn_t compare_fn, + opal_dss_size_fn_t size_fn, + opal_dss_print_fn_t print_fn, + opal_dss_release_fn_t release_fn, bool structured, - const char *name, orte_data_type_t *type); + const char *name, opal_data_type_t *type); /* * This function looks up the string name corresponding to the identified * data type - used for debugging messages. */ -typedef char* (*orte_dss_lookup_data_type_fn_t)(orte_data_type_t type); +typedef char* (*opal_dss_lookup_data_type_fn_t)(opal_data_type_t type); /* * Dump the data type list - used for debugging to see what has been registered */ -typedef void (*orte_dss_dump_data_types_fn_t)(int output); +typedef void (*opal_dss_dump_data_types_fn_t)(int output); + +/* utility funtions for specialized packing and unpacking. + * These are useful for creating pack/unpack functions for user + * defined types. these are NOT for general purpose use */ +typedef int (*opal_dss_pack_buffer_fn_t)(opal_buffer_t *buffer, const void *src, + int32_t num_values, opal_data_type_t type); +typedef int (*opal_dss_unpack_buffer_fn_t)(opal_buffer_t *buffer, void *dest, + int32_t *num_values, + opal_data_type_t type); + /** * Base structure for the DSS @@ -604,36 +611,36 @@ typedef void (*orte_dss_dump_data_types_fn_t)(int output); * Base module structure for the DSS - presents the required function * pointers to the calling interface. */ -struct orte_dss_t { - orte_dss_set_fn_t set; - orte_dss_get_fn_t get; - orte_dss_arith_fn_t arith; - orte_dss_increment_fn_t increment; - orte_dss_decrement_fn_t decrement; - orte_dss_set_buffer_type_fn_t set_buffer_type; - orte_dss_pack_fn_t pack; - orte_dss_unpack_fn_t unpack; - orte_dss_copy_fn_t copy; - orte_dss_compare_fn_t compare; - orte_dss_size_fn_t size; - orte_dss_print_fn_t print; - orte_dss_release_fn_t release; - orte_dss_peek_next_item_fn_t peek; - orte_dss_unload_fn_t unload; - orte_dss_load_fn_t load; - orte_dss_xfer_payload_fn_t xfer_payload; - orte_dss_copy_payload_fn_t copy_payload; - orte_dss_register_fn_t register_type; - orte_dss_lookup_data_type_fn_t lookup_data_type; - orte_dss_dump_data_types_fn_t dump_data_types; - orte_dss_dump_fn_t dump; +struct opal_dss_t { + opal_dss_set_fn_t set; + opal_dss_get_fn_t get; + opal_dss_arith_fn_t arith; + opal_dss_increment_fn_t increment; + opal_dss_decrement_fn_t decrement; + opal_dss_set_buffer_type_fn_t set_buffer_type; + opal_dss_pack_fn_t pack; + opal_dss_unpack_fn_t unpack; + opal_dss_copy_fn_t copy; + opal_dss_compare_fn_t compare; + opal_dss_size_fn_t size; + opal_dss_print_fn_t print; + opal_dss_release_fn_t release; + opal_dss_peek_next_item_fn_t peek; + opal_dss_unload_fn_t unload; + opal_dss_load_fn_t load; + opal_dss_xfer_payload_fn_t xfer_payload; + opal_dss_copy_payload_fn_t copy_payload; + opal_dss_register_fn_t register_type; + opal_dss_lookup_data_type_fn_t lookup_data_type; + opal_dss_dump_data_types_fn_t dump_data_types; + opal_dss_dump_fn_t dump; + opal_dss_pack_buffer_fn_t pack_buffer; + opal_dss_unpack_buffer_fn_t unpack_buffer; }; -typedef struct orte_dss_t orte_dss_t; +typedef struct opal_dss_t opal_dss_t; -ORTE_DECLSPEC extern orte_dss_t orte_dss; /* holds dss function pointers */ +OPAL_DECLSPEC extern opal_dss_t opal_dss; /* holds dss function pointers */ -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS -#endif /* ORTE_DSS_H */ +#endif /* OPAL_DSS_H */ diff --git a/opal/dss/dss_arith.c b/opal/dss/dss_arith.c new file mode 100644 index 0000000000..856cdb5506 --- /dev/null +++ b/opal/dss/dss_arith.c @@ -0,0 +1,666 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/dss/dss_internal.h" + +static void opal_dss_arith_int(int *value, int *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_uint(uint *value, uint *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_size(size_t *value, size_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_pid(pid_t *value, pid_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_byte(uint8_t *value, uint8_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_int8(int8_t *value, int8_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_int16(int16_t *value, int16_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_uint16(uint16_t *value, uint16_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_int32(int32_t *value, int32_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_uint32(uint32_t *value, uint32_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_int64(int64_t *value, int64_t *operand, opal_dss_arith_op_t operation); +static void opal_dss_arith_uint64(uint64_t *value, uint64_t *operand, opal_dss_arith_op_t operation); + +/* some weird ones - but somebody *might* want to do it, I suppose... */ +static void opal_dss_arith_data_type(opal_data_type_t *value, opal_data_type_t *operand, opal_dss_arith_op_t operation); + +int opal_dss_arith(opal_dss_value_t *value, opal_dss_value_t *operand, opal_dss_arith_op_t operation) +{ + /* check for error */ + if (NULL == value || NULL == operand) { + return OPAL_ERR_BAD_PARAM; + } + if (operand->type != value->type) { + return OPAL_ERR_TYPE_MISMATCH; + } + + /* Lookup the arith function for this type and call it */ + + switch(operand->type) { + case OPAL_INT: + opal_dss_arith_int((int*)value->data, (int*)operand->data, operation); + break; + + case OPAL_UINT: + opal_dss_arith_uint((uint*)value->data, (uint*)operand->data, operation); + break; + + case OPAL_SIZE: + opal_dss_arith_size((size_t*)value->data, (size_t*)operand->data, operation); + break; + + case OPAL_PID: + opal_dss_arith_pid((pid_t*)value->data, (pid_t*)operand->data, operation); + break; + + case OPAL_BYTE: + case OPAL_UINT8: + opal_dss_arith_byte((uint8_t*)value->data, (uint8_t*)operand->data, operation); + break; + + case OPAL_INT8: + opal_dss_arith_int8((int8_t*)value->data, (int8_t*)operand->data, operation); + break; + + case OPAL_INT16: + opal_dss_arith_int16((int16_t*)value->data, (int16_t*)operand->data, operation); + break; + + case OPAL_UINT16: + opal_dss_arith_uint16((uint16_t*)value->data, (uint16_t*)operand->data, operation); + break; + + case OPAL_INT32: + opal_dss_arith_int32((int32_t*)value->data, (int32_t*)operand->data, operation); + break; + + case OPAL_UINT32: + opal_dss_arith_uint32((uint32_t*)value->data, (uint32_t*)operand->data, operation); + break; + + case OPAL_INT64: + opal_dss_arith_int64((int64_t*)value->data, (int64_t*)operand->data, operation); + break; + + case OPAL_UINT64: + opal_dss_arith_uint64((uint64_t*)value->data, (uint64_t*)operand->data, operation); + break; + + default: + return OPAL_ERR_OPERATION_UNSUPPORTED; + } + + return OPAL_SUCCESS; +} + +int opal_dss_increment(opal_dss_value_t *value) +{ + int one; + unsigned int uone; + size_t sone; + pid_t pone; + uint8_t u8one; + int8_t i8one; + uint16_t u16one; + int16_t i16one; + uint32_t u32one; + int32_t i32one; + uint64_t u64one; + int64_t i64one; + opal_data_type_t datatypeone; + + /* check for error */ + if (NULL == value) { + return OPAL_ERR_BAD_PARAM; + } + /* Lookup the arith function for this type and call it */ + + switch(value->type) { + case OPAL_INT: + one = 1; + opal_dss_arith_int((int*)value->data, &one, OPAL_DSS_ADD); + break; + + case OPAL_UINT: + uone = 1; + opal_dss_arith_uint((uint*)value->data, &uone, OPAL_DSS_ADD); + break; + + case OPAL_SIZE: + sone = 1; + opal_dss_arith_size((size_t*)value->data, &sone, OPAL_DSS_ADD); + break; + + case OPAL_PID: + pone = 1; + opal_dss_arith_pid((pid_t*)value->data, &pone, OPAL_DSS_ADD); + break; + + case OPAL_BYTE: + case OPAL_UINT8: + u8one = 1; + opal_dss_arith_byte((uint8_t*)value->data, &u8one, OPAL_DSS_ADD); + break; + + case OPAL_INT8: + i8one = 1; + opal_dss_arith_int8((int8_t*)value->data, &i8one, OPAL_DSS_ADD); + break; + + case OPAL_INT16: + i16one = 1; + opal_dss_arith_int16((int16_t*)value->data, &i16one, OPAL_DSS_ADD); + break; + + case OPAL_UINT16: + u16one = 1; + opal_dss_arith_uint16((uint16_t*)value->data, &u16one, OPAL_DSS_ADD); + break; + + case OPAL_INT32: + i32one = 1; + opal_dss_arith_int32((int32_t*)value->data, &i32one, OPAL_DSS_ADD); + break; + + case OPAL_UINT32: + u32one = 1; + opal_dss_arith_uint32((uint32_t*)value->data, &u32one, OPAL_DSS_ADD); + break; + + case OPAL_INT64: + i64one = 1; + opal_dss_arith_int64((int64_t*)value->data, &i64one, OPAL_DSS_ADD); + break; + + case OPAL_UINT64: + u64one = 1; + opal_dss_arith_uint64((uint64_t*)value->data, &u64one, OPAL_DSS_ADD); + break; + + case OPAL_DATA_TYPE: + datatypeone = 1; + opal_dss_arith_data_type((opal_data_type_t*)value->data, &datatypeone, OPAL_DSS_ADD); + break; + + default: + return OPAL_ERR_OPERATION_UNSUPPORTED; + } + + return OPAL_SUCCESS; +} + +int opal_dss_decrement(opal_dss_value_t *value) +{ + int one; + unsigned int uone; + size_t sone; + pid_t pone; + uint8_t u8one; + int8_t i8one; + uint16_t u16one; + int16_t i16one; + uint32_t u32one; + int32_t i32one; + uint64_t u64one; + int64_t i64one; + opal_data_type_t datatypeone; + + /* check for error */ + if (NULL == value) { + return OPAL_ERR_BAD_PARAM; + } + /* Lookup the arith function for this type and call it */ + + switch(value->type) { + case OPAL_INT: + one = 1; + opal_dss_arith_int((int*)value->data, &one, OPAL_DSS_SUB); + break; + + case OPAL_UINT: + uone = 1; + opal_dss_arith_uint((uint*)value->data, &uone, OPAL_DSS_SUB); + break; + + case OPAL_SIZE: + sone = 1; + opal_dss_arith_size((size_t*)value->data, &sone, OPAL_DSS_SUB); + break; + + case OPAL_PID: + pone = 1; + opal_dss_arith_pid((pid_t*)value->data, &pone, OPAL_DSS_SUB); + break; + + case OPAL_BYTE: + case OPAL_UINT8: + u8one = 1; + opal_dss_arith_byte((uint8_t*)value->data, &u8one, OPAL_DSS_SUB); + break; + + case OPAL_INT8: + i8one = 1; + opal_dss_arith_int8((int8_t*)value->data, &i8one, OPAL_DSS_SUB); + break; + + case OPAL_INT16: + i16one = 1; + opal_dss_arith_int16((int16_t*)value->data, &i16one, OPAL_DSS_SUB); + break; + + case OPAL_UINT16: + u16one = 1; + opal_dss_arith_uint16((uint16_t*)value->data, &u16one, OPAL_DSS_SUB); + break; + + case OPAL_INT32: + i32one = 1; + opal_dss_arith_int32((int32_t*)value->data, &i32one, OPAL_DSS_SUB); + break; + + case OPAL_UINT32: + u32one = 1; + opal_dss_arith_uint32((uint32_t*)value->data, &u32one, OPAL_DSS_SUB); + break; + + case OPAL_INT64: + i64one = 1; + opal_dss_arith_int64((int64_t*)value->data, &i64one, OPAL_DSS_SUB); + break; + + case OPAL_UINT64: + u64one = 1; + opal_dss_arith_uint64((uint64_t*)value->data, &u64one, OPAL_DSS_SUB); + break; + + case OPAL_DATA_TYPE: + datatypeone = 1; + opal_dss_arith_data_type((opal_data_type_t*)value->data, &datatypeone, OPAL_DSS_SUB); + break; + + default: + return OPAL_ERR_OPERATION_UNSUPPORTED; + } + + return OPAL_SUCCESS; +} + +/* + * NUMERIC arith FUNCTIONS + */ +static void opal_dss_arith_int(int *value, int *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_uint(uint *value, uint *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_size(size_t *value, size_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_pid(pid_t *value, pid_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_byte(uint8_t *value, uint8_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_int8(int8_t *value, int8_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_int16(int16_t *value, int16_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_uint16(uint16_t *value, uint16_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_int32(int32_t *value, int32_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_uint32(uint32_t *value, uint32_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_int64(int64_t *value, int64_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_uint64(uint64_t *value, uint64_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + +static void opal_dss_arith_data_type(opal_data_type_t *value, opal_data_type_t *operand, opal_dss_arith_op_t operation) +{ + switch(operation) { + case OPAL_DSS_ADD: + (*value) += *operand; + break; + + case OPAL_DSS_SUB: + (*value) -= *operand; + break; + + case OPAL_DSS_MUL: + (*value) *= *operand; + break; + + case OPAL_DSS_DIV: + if (0 == *operand) { + return; + } + (*value) /= *operand; + break; + + default: + break; + } + return; +} + diff --git a/opal/dss/dss_compare.c b/opal/dss/dss_compare.c new file mode 100644 index 0000000000..775927778b --- /dev/null +++ b/opal/dss/dss_compare.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include + +#include "opal/dss/dss_internal.h" + +int opal_dss_compare(void *value1, void *value2, opal_data_type_t type) +{ + opal_dss_type_info_t *info; + + /* check for error */ + if (NULL == value1 || NULL == value2) { + return OPAL_ERR_BAD_PARAM; + } + + /* Lookup the compare function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNKNOWN_DATA_TYPE; + } + + return info->odti_compare_fn(value1, value2, type); +} + +/* + * NUMERIC COMPARE FUNCTIONS + */ +int opal_dss_compare_int(int *value1, int *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint(unsigned int *value1, unsigned int *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_size(size_t *value1, size_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_pid(pid_t *value1, pid_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_byte(char *value1, char *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_char(char *value1, char *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_int8(int8_t *value1, int8_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint8(uint8_t *value1, uint8_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_int16(int16_t *value1, int16_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint16(uint16_t *value1, uint16_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_int32(int32_t *value1, int32_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint32(uint32_t *value1, uint32_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_int64(int64_t *value1, int64_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +int opal_dss_compare_uint64(uint64_t *value1, uint64_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +/* + * NON-NUMERIC SYSTEM TYPES + */ + +/* NULL */ +int opal_dss_compare_null(char *value1, char *value2, opal_data_type_t type) +{ + return OPAL_EQUAL; +} + +/* BOOL */ +int opal_dss_compare_bool(bool *value1, bool *value2, opal_data_type_t type) +{ + if (*value1 && !(*value2)) return OPAL_VALUE1_GREATER; + + if (*value2 && !(*value1)) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; + +} + +/* STRING */ +int opal_dss_compare_string(char *value1, char *value2, opal_data_type_t type) +{ + if (0 < strcmp(value1, value2)) return OPAL_VALUE2_GREATER; + + if (0 > strcmp(value1, value2)) return OPAL_VALUE1_GREATER; + + return OPAL_EQUAL; +} + +/* COMPARE FUNCTIONS FOR GENERIC OPAL TYPES */ +/* OPAL_DATA_TYPE */ +int opal_dss_compare_dt(opal_data_type_t *value1, opal_data_type_t *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + +/* OPAL_DATA_VALUE */ +int opal_dss_compare_data_value(opal_dss_value_t *value1, opal_dss_value_t *value2, opal_data_type_t type) +{ + /* can't compare if the two types don't match */ + if (value1->type != value2->type) { + return OPAL_ERR_TYPE_MISMATCH; + } + + /* okay, go ahead and compare the values themselves */ + return opal_dss.compare(value1->data, value2->data, value1->type); +} + +/* OPAL_BYTE_OBJECT */ +int opal_dss_compare_byte_object(opal_byte_object_t *value1, opal_byte_object_t *value2, opal_data_type_t type) +{ + int checksum, diff; + int32_t i; + + /* compare the sizes first - bigger size object is "greater than" */ + if (value1->size > value2->size) return OPAL_VALUE1_GREATER; + + if (value2->size > value1->size) return OPAL_VALUE2_GREATER; + + /* get here if the two sizes are identical - now do a simple checksum-style + * calculation to determine "biggest" + */ + checksum = 0; + + for (i=0; i < value1->size; i++) { + /* protect against overflows */ + diff = value1->bytes[i] - value2->bytes[i]; + if (INT_MAX-abs(checksum)-abs(diff) < 0) { /* got an overflow condition */ + checksum = 0; + } + checksum += diff; + } + + if (0 > checksum) return OPAL_VALUE2_GREATER; /* sum of value2 bytes was greater */ + + if (0 < checksum) return OPAL_VALUE1_GREATER; /* of value1 bytes was greater */ + + return OPAL_EQUAL; /* sum of both value's bytes was identical */ +} diff --git a/opal/dss/dss_copy.c b/opal/dss/dss_copy.c new file mode 100644 index 0000000000..09dbc071ac --- /dev/null +++ b/opal/dss/dss_copy.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/dss/dss_internal.h" + +int opal_dss_copy(void **dest, void *src, opal_data_type_t type) +{ + opal_dss_type_info_t *info; + + /* check for error */ + if (NULL == dest) { + return OPAL_ERR_BAD_PARAM; + } + if (NULL == src && (OPAL_NULL != type && OPAL_STRING != type)) { + return OPAL_ERR_BAD_PARAM; + } + + /* Lookup the copy function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNKNOWN_DATA_TYPE; + } + + return info->odti_copy_fn(dest, src, type); +} + +/* + * STANDARD COPY FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED + */ +int opal_dss_std_copy(void **dest, void *src, opal_data_type_t type) +{ + size_t datasize; + uint8_t *val = NULL; + + switch(type) { + case OPAL_BOOL: + datasize = sizeof(bool); + break; + + case OPAL_INT: + case OPAL_UINT: + datasize = sizeof(int); + break; + + case OPAL_SIZE: + datasize = sizeof(size_t); + break; + + case OPAL_PID: + datasize = sizeof(pid_t); + break; + + case OPAL_BYTE: + case OPAL_INT8: + case OPAL_UINT8: + datasize = 1; + break; + + case OPAL_INT16: + case OPAL_UINT16: + datasize = 2; + break; + + case OPAL_INT32: + case OPAL_UINT32: + datasize = 4; + break; + + case OPAL_INT64: + case OPAL_UINT64: + datasize = 8; + break; + + case OPAL_DATA_TYPE: + datasize = sizeof(opal_data_type_t); + break; + + default: + return OPAL_ERR_UNKNOWN_DATA_TYPE; + } + + val = (uint8_t*)malloc(datasize); + if (NULL == val) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + memcpy(val, src, datasize); + *dest = val; + + return OPAL_SUCCESS; +} + +/* COPY FUNCTIONS FOR NON-STANDARD SYSTEM TYPES */ + +/* + * NULL + */ +int opal_dss_copy_null(char **dest, char *src, opal_data_type_t type) +{ + char *val; + + *dest = (char*)malloc(sizeof(char*)); + if (NULL == *dest) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + val = *dest; /* save the address of the value */ + + /* set the dest to null */ + *val = 0x00; + + return OPAL_SUCCESS; +} + +/* + * STRING + */ +int opal_dss_copy_string(char **dest, char *src, opal_data_type_t type) +{ + if (NULL == src) { /* got zero-length string/NULL pointer - store NULL */ + *dest = NULL; + } else { + *dest = strdup(src); + } + + return OPAL_SUCCESS; +} + +/* COPY FUNCTIONS FOR GENERIC OPAL TYPES */ + +/* + * OPAL_DATA_VALUE + */ +int opal_dss_copy_data_value(opal_dss_value_t **dest, opal_dss_value_t *src, + opal_data_type_t type) +{ + int rc; + + /* create the new object */ + *dest = OBJ_NEW(opal_dss_value_t); + if (NULL == *dest) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + (*dest)->type = src->type; + + /* copy the payload with its associated copy function */ + if (OPAL_SUCCESS != (rc = opal_dss.copy(&((*dest)->data), src->data, src->type))) { + OBJ_RELEASE(*dest); + return rc; + } + + return OPAL_SUCCESS; +} + + +/* + * OPAL_BYTE_OBJECT + */ +int opal_dss_copy_byte_object(opal_byte_object_t **dest, opal_byte_object_t *src, + opal_data_type_t type) +{ + /* allocate space for the new object */ + *dest = (opal_byte_object_t*)malloc(sizeof(opal_byte_object_t)); + if (NULL == *dest) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + (*dest)->size = src->size; + + /* allocate the required space for the bytes */ + (*dest)->bytes = (uint8_t*)malloc(src->size); + if (NULL == (*dest)->bytes) { + OBJ_RELEASE(*dest); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* copy the data across */ + memcpy((*dest)->bytes, src->bytes, src->size); + + return OPAL_SUCCESS; +} diff --git a/orte/dss/dss_dump.c b/opal/dss/dss_dump.c similarity index 57% rename from orte/dss/dss_dump.c rename to opal/dss/dss_dump.c index b793ef609b..6d356420ee 100644 --- a/orte/dss/dss_dump.c +++ b/opal/dss/dss_dump.c @@ -14,52 +14,46 @@ * $HEADER$ */ -#include "orte_config.h" -#include "orte/orte_types.h" +#include "opal_config.h" #include "opal/util/output.h" -#include "orte/mca/errmgr/errmgr.h" +#include "opal/dss/dss_internal.h" -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - - -int orte_dss_dump(int output_stream, void *src, orte_data_type_t type) +int opal_dss_dump(int output_stream, void *src, opal_data_type_t type) { char *sptr; int rc; - if (ORTE_SUCCESS != (rc = orte_dss.print(&sptr, NULL, src, type))) { - ORTE_ERROR_LOG(rc); + if (OPAL_SUCCESS != (rc = opal_dss.print(&sptr, NULL, src, type))) { return rc; } opal_output(output_stream, "%s", sptr); free(sptr); - return ORTE_SUCCESS; + return OPAL_SUCCESS; } -void orte_dss_dump_data_types(int output) +void opal_dss_dump_data_types(int output) { - orte_dss_type_info_t **ptr; - orte_data_type_t j; - orte_std_cntr_t i; + opal_dss_type_info_t *ptr; + opal_data_type_t j; + int32_t i; opal_output(output, "DUMP OF REGISTERED DATA TYPES"); - ptr = (orte_dss_type_info_t**)(orte_dss_types->addr); - for (i=0, j=0; j < orte_dss_num_reg_types && - i < orte_dss_types->size; i++) { - if (NULL != ptr[i]) { + j = 0; + for (i=0; i < opal_pointer_array_get_size(&opal_dss_types); i++) { + ptr = opal_pointer_array_get_item(&opal_dss_types, i); + if (NULL != ptr) { j++; /* print out the info */ opal_output(output, "\tIndex: %lu\tData type: %lu\tName: %s", (unsigned long)j, - (unsigned long)ptr[i]->odti_type, - ptr[i]->odti_name); + (unsigned long)ptr->odti_type, + ptr->odti_name); } } } diff --git a/orte/dss/dss_get.c b/opal/dss/dss_get.c similarity index 71% rename from orte/dss/dss_get.c rename to opal/dss/dss_get.c index 21698d1079..1a1a007edb 100644 --- a/orte/dss/dss_get.c +++ b/opal/dss/dss_get.c @@ -14,24 +14,16 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" +#include "opal/dss/dss_internal.h" #include "opal/util/output.h" -int orte_dss_get(void **data, orte_data_value_t *value, orte_data_type_t type) +int opal_dss_get(void **data, opal_dss_value_t *value, opal_data_type_t type) { /* check for error */ if (NULL == value || NULL == data) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* okay, we assume that the user has provided memory for the destination. @@ -41,13 +33,12 @@ int orte_dss_get(void **data, orte_data_value_t *value, orte_data_type_t type) * type of data being requested */ if (type != value->type) { - ORTE_ERROR_LOG(ORTE_ERR_TYPE_MISMATCH); - return ORTE_ERR_TYPE_MISMATCH; + return OPAL_ERR_TYPE_MISMATCH; } /* point the destination at the value */ *data = value->data; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/opal/dss/dss_internal.h b/opal/dss/dss_internal.h new file mode 100644 index 0000000000..f856a44ec9 --- /dev/null +++ b/opal/dss/dss_internal.h @@ -0,0 +1,477 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef OPAL_DSS_INTERNAL_H_ +#define OPAL_DSS_INTERNAL_H_ + +#include "opal_config.h" +#include "opal/constants.h" + +#include "opal/class/opal_pointer_array.h" + +#include "opal/dss/dss.h" + +#if HAVE_STRING_H +# if !defined(STDC_HEADERS) && HAVE_MEMORY_H +# include +# endif +# include +#endif + +BEGIN_C_DECLS + +/* + * The default starting chunk size + */ +#define OPAL_DSS_DEFAULT_INITIAL_SIZE 128 +/* + * The default threshold size when we switch from doubling the + * buffer size to addatively increasing it + */ +#define OPAL_DSS_DEFAULT_THRESHOLD_SIZE 1024 + +/* + * Internal type corresponding to size_t. Do not use this in + * interface calls - use OPAL_SIZE instead. + */ +#if SIZEOF_SIZE_T == 1 +#define DSS_TYPE_SIZE_T OPAL_UINT8 +#elif SIZEOF_SIZE_T == 2 +#define DSS_TYPE_SIZE_T OPAL_UINT16 +#elif SIZEOF_SIZE_T == 4 +#define DSS_TYPE_SIZE_T OPAL_UINT32 +#elif SIZEOF_SIZE_T == 8 +#define DSS_TYPE_SIZE_T OPAL_UINT64 +#else +#error Unsupported size_t size! +#endif + +/* + * Internal type corresponding to bool. Do not use this in interface + * calls - use OPAL_BOOL instead. + */ +#if SIZEOF_BOOL == 1 +#define DSS_TYPE_BOOL OPAL_UINT8 +#elif SIZEOF_BOOL == 2 +#define DSS_TYPE_BOOL OPAL_UINT16 +#elif SIZEOF_BOOL == 4 +#define DSS_TYPE_BOOL OPAL_UINT32 +#elif SIZEOF_BOOL == 8 +#define DSS_TYPE_BOOL OPAL_UINT64 +#else +#error Unsupported bool size! +#endif + +/* + * Internal type corresponding to int and unsigned int. Do not use + * this in interface calls - use OPAL_INT / OPAL_UINT instead. + */ +#if SIZEOF_INT == 1 +#define DSS_TYPE_INT OPAL_INT8 +#define DSS_TYPE_UINT OPAL_UINT8 +#elif SIZEOF_INT == 2 +#define DSS_TYPE_INT OPAL_INT16 +#define DSS_TYPE_UINT OPAL_UINT16 +#elif SIZEOF_INT == 4 +#define DSS_TYPE_INT OPAL_INT32 +#define DSS_TYPE_UINT OPAL_UINT32 +#elif SIZEOF_INT == 8 +#define DSS_TYPE_INT OPAL_INT64 +#define DSS_TYPE_UINT OPAL_UINT64 +#else +#error Unsupported int size! +#endif + +/* + * Internal type corresponding to pid_t. Do not use this in interface + * calls - use OPAL_PID instead. + */ +#if SIZEOF_PID_T == 1 +#define DSS_TYPE_PID_T OPAL_UINT8 +#elif SIZEOF_PID_T == 2 +#define DSS_TYPE_PID_T OPAL_UINT16 +#elif SIZEOF_PID_T == 4 +#define DSS_TYPE_PID_T OPAL_UINT32 +#elif SIZEOF_PID_T == 8 +#define DSS_TYPE_PID_T OPAL_UINT64 +#else +#error Unsupported pid_t size! +#endif + +/* Unpack generic size macros */ +#define UNPACK_SIZE_MISMATCH(unpack_type, remote_type, ret) \ +do { \ + switch(remote_type) { \ + case OPAL_UINT8: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint8_t, remote_type); \ + break; \ + case OPAL_INT8: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int8_t, remote_type); \ + break; \ + case OPAL_UINT16: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint16_t, remote_type); \ + break; \ + case OPAL_INT16: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int16_t, remote_type); \ + break; \ + case OPAL_UINT32: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint32_t, remote_type); \ + break; \ + case OPAL_INT32: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int32_t, remote_type); \ + break; \ + case OPAL_UINT64: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint64_t, remote_type); \ + break; \ + case OPAL_INT64: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int64_t, remote_type); \ + break; \ + default: \ + ret = OPAL_ERR_NOT_FOUND; \ + } \ +} while (0) + +/* NOTE: do not need to deal with endianness here, as the unpacking of +the underling sender-side type will do that for us. Repeat: the +data in tmpbuf[] is already in host byte order. */ +#define UNPACK_SIZE_MISMATCH_FOUND(unpack_type, tmptype, tmpdsstype) \ +do { \ + int32_t i; \ + tmptype *tmpbuf = (tmptype*)malloc(sizeof(tmptype) * (*num_vals)); \ + ret = opal_dss_unpack_buffer(buffer, tmpbuf, num_vals, tmpdsstype); \ + for (i = 0 ; i < *num_vals ; ++i) { \ + ((unpack_type*) dest)[i] = (unpack_type)(tmpbuf[i]); \ + } \ + free(tmpbuf); \ +} while (0) + + +/** + * Internal struct used for holding registered dss functions + */ +struct opal_dss_type_info_t { + opal_object_t super; + /* type identifier */ + opal_data_type_t odti_type; + /** Debugging string name */ + char *odti_name; + /** Pack function */ + opal_dss_pack_fn_t odti_pack_fn; + /** Unpack function */ + opal_dss_unpack_fn_t odti_unpack_fn; + /** copy function */ + opal_dss_copy_fn_t odti_copy_fn; + /** compare function */ + opal_dss_compare_fn_t odti_compare_fn; + /** size function */ + opal_dss_size_fn_t odti_size_fn; + /** print function */ + opal_dss_print_fn_t odti_print_fn; + /** Release function */ + opal_dss_release_fn_t odti_release_fn; + /** flag to indicate structured data */ + bool odti_structured; +}; +/** + * Convenience typedef + */ +typedef struct opal_dss_type_info_t opal_dss_type_info_t; +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_dss_type_info_t); + +/* + * globals needed within dss + */ +extern bool opal_dss_initialized; +extern bool opal_dss_debug; +extern int opal_dss_verbose; +extern int opal_dss_initial_size; +extern int opal_dss_threshold_size; +extern opal_pointer_array_t opal_dss_types; +extern opal_data_type_t opal_dss_num_reg_types; + + /* + * Implementations of API functions + */ + + int opal_dss_set(opal_dss_value_t *value, void *new_value, opal_data_type_t type); + + int opal_dss_get(void **data, opal_dss_value_t *value, opal_data_type_t type); + + int opal_dss_arith(opal_dss_value_t *value, opal_dss_value_t *operand, opal_dss_arith_op_t operation); + + int opal_dss_increment(opal_dss_value_t *value); + + int opal_dss_decrement(opal_dss_value_t *value); + + int opal_dss_set_buffer_type(opal_buffer_t *buffer, opal_dss_buffer_type_t type); + + int opal_dss_pack(opal_buffer_t *buffer, const void *src, + int32_t num_vals, + opal_data_type_t type); + int opal_dss_unpack(opal_buffer_t *buffer, void *dest, + int32_t *max_num_vals, + opal_data_type_t type); + + int opal_dss_copy(void **dest, void *src, opal_data_type_t type); + + int opal_dss_compare(void *value1, void *value2, + opal_data_type_t type); + + int opal_dss_print(char **output, char *prefix, void *src, opal_data_type_t type); + + int opal_dss_dump(int output_stream, void *src, opal_data_type_t type); + + int opal_dss_size(size_t *size, void *src, opal_data_type_t type); + + int opal_dss_peek(opal_buffer_t *buffer, opal_data_type_t *type, + int32_t *number); + + int opal_dss_peek_type(opal_buffer_t *buffer, opal_data_type_t *type); + + int opal_dss_unload(opal_buffer_t *buffer, void **payload, + int32_t *bytes_used); + int opal_dss_load(opal_buffer_t *buffer, void *payload, int32_t bytes_used); + + int opal_dss_xfer_payload(opal_buffer_t *dest, opal_buffer_t *src); + + int opal_dss_copy_payload(opal_buffer_t *dest, opal_buffer_t *src); + + int opal_dss_register(opal_dss_pack_fn_t pack_fn, + opal_dss_unpack_fn_t unpack_fn, + opal_dss_copy_fn_t copy_fn, + opal_dss_compare_fn_t compare_fn, + opal_dss_size_fn_t size_fn, + opal_dss_print_fn_t print_fn, + opal_dss_release_fn_t release_fn, + bool structured, + const char *name, opal_data_type_t *type); + + void opal_dss_release(opal_dss_value_t *value); + + char *opal_dss_lookup_data_type(opal_data_type_t type); + + void opal_dss_dump_data_types(int output); + + /* + * Specialized API functions + */ + int opal_dss_pack_buffer(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_unpack_buffer(opal_buffer_t *buffer, void *dst, + int32_t *num_vals, opal_data_type_t type); + + /* + * Internal pack functions + */ + + int opal_dss_pack_null(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + int opal_dss_pack_byte(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_bool(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_int(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + int opal_dss_pack_int16(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + int opal_dss_pack_int32(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + int opal_dss_pack_int64(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_sizet(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_pid(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_string(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_data_type(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_data_value(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + int opal_dss_pack_byte_object(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + + /* + * Internal unpack functions + */ + + int opal_dss_unpack_null(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + int opal_dss_unpack_byte(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_bool(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_int(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + int opal_dss_unpack_int16(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + int opal_dss_unpack_int32(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + int opal_dss_unpack_int64(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_sizet(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_pid(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_string(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_data_type(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_data_value(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + int opal_dss_unpack_byte_object(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + + /* + * Internal copy functions + */ + + int opal_dss_std_copy(void **dest, void *src, opal_data_type_t type); + + int opal_dss_copy_null(char **dest, char *src, opal_data_type_t type); + + int opal_dss_copy_string(char **dest, char *src, opal_data_type_t type); + + int opal_dss_copy_byte_object(opal_byte_object_t **dest, opal_byte_object_t *src, + opal_data_type_t type); + + int opal_dss_copy_data_value(opal_dss_value_t **dest, opal_dss_value_t *src, + opal_data_type_t type); + /* + * Internal compare functions + */ + + int opal_dss_compare_bool(bool *value1, bool *value2, opal_data_type_t type); + + int opal_dss_compare_int(int *value1, int *value2, opal_data_type_t type); + int opal_dss_compare_uint(unsigned int *value1, unsigned int *value2, opal_data_type_t type); + + int opal_dss_compare_size(size_t *value1, size_t *value2, opal_data_type_t type); + + int opal_dss_compare_pid(pid_t *value1, pid_t *value2, opal_data_type_t type); + + int opal_dss_compare_byte(char *value1, char *value2, opal_data_type_t type); + int opal_dss_compare_char(char *value1, char *value2, opal_data_type_t type); + int opal_dss_compare_int8(int8_t *value1, int8_t *value2, opal_data_type_t type); + int opal_dss_compare_uint8(uint8_t *value1, uint8_t *value2, opal_data_type_t type); + + int opal_dss_compare_int16(int16_t *value1, int16_t *value2, opal_data_type_t type); + int opal_dss_compare_uint16(uint16_t *value1, uint16_t *value2, opal_data_type_t type); + + int opal_dss_compare_int32(int32_t *value1, int32_t *value2, opal_data_type_t type); + int opal_dss_compare_uint32(uint32_t *value1, uint32_t *value2, opal_data_type_t type); + + int opal_dss_compare_int64(int64_t *value1, int64_t *value2, opal_data_type_t type); + int opal_dss_compare_uint64(uint64_t *value1, uint64_t *value2, opal_data_type_t type); + + int opal_dss_compare_null(char *value1, char *value2, opal_data_type_t type); + + int opal_dss_compare_string(char *value1, char *value2, opal_data_type_t type); + + int opal_dss_compare_dt(opal_data_type_t *value1, opal_data_type_t *value2, opal_data_type_t type); + + int opal_dss_compare_data_value(opal_dss_value_t *value1, opal_dss_value_t *value2, opal_data_type_t type); + + int opal_dss_compare_byte_object(opal_byte_object_t *value1, opal_byte_object_t *value2, opal_data_type_t type); + + /* + * Internal size functions + */ + int opal_dss_std_size(size_t *size, void *src, opal_data_type_t type); + + int opal_dss_size_string(size_t *size, char *src, opal_data_type_t type); + + int opal_dss_size_data_value(size_t *size, opal_dss_value_t *src, opal_data_type_t type); + + int opal_dss_size_byte_object(size_t *size, opal_byte_object_t *src, opal_data_type_t type); + + /* + * Internal print functions + */ + int opal_dss_print_byte(char **output, char *prefix, uint8_t *src, opal_data_type_t type); + + int opal_dss_print_string(char **output, char *prefix, char *src, opal_data_type_t type); + + int opal_dss_print_size(char **output, char *prefix, size_t *src, opal_data_type_t type); + int opal_dss_print_pid(char **output, char *prefix, pid_t *src, opal_data_type_t type); + int opal_dss_print_bool(char **output, char *prefix, bool *src, opal_data_type_t type); + int opal_dss_print_int(char **output, char *prefix, int *src, opal_data_type_t type); + int opal_dss_print_uint(char **output, char *prefix, int *src, opal_data_type_t type); + int opal_dss_print_uint8(char **output, char *prefix, uint8_t *src, opal_data_type_t type); + int opal_dss_print_uint16(char **output, char *prefix, uint16_t *src, opal_data_type_t type); + int opal_dss_print_uint32(char **output, char *prefix, uint32_t *src, opal_data_type_t type); + int opal_dss_print_int8(char **output, char *prefix, int8_t *src, opal_data_type_t type); + int opal_dss_print_int16(char **output, char *prefix, int16_t *src, opal_data_type_t type); + int opal_dss_print_int32(char **output, char *prefix, int32_t *src, opal_data_type_t type); +#ifdef HAVE_INT64_T + int opal_dss_print_uint64(char **output, char *prefix, uint64_t *src, opal_data_type_t type); + int opal_dss_print_int64(char **output, char *prefix, int64_t *src, opal_data_type_t type); +#else + int opal_dss_print_uint64(char **output, char *prefix, void *src, opal_data_type_t type); + int opal_dss_print_int64(char **output, char *prefix, void *src, opal_data_type_t type); +#endif + int opal_dss_print_null(char **output, char *prefix, void *src, opal_data_type_t type); + int opal_dss_print_data_type(char **output, char *prefix, opal_data_type_t *src, opal_data_type_t type); + int opal_dss_print_data_value(char **output, char *prefix, opal_dss_value_t *src, opal_data_type_t type); + int opal_dss_print_byte_object(char **output, char *prefix, opal_byte_object_t *src, opal_data_type_t type); + + + /* + * Internal release functions + */ + void opal_dss_std_release(opal_dss_value_t *value); + + void opal_dss_std_obj_release(opal_dss_value_t *value); + + void opal_dss_release_byte_object(opal_dss_value_t *value); + + /* + * Internal helper functions + */ + + char* opal_dss_buffer_extend(opal_buffer_t *bptr, size_t bytes_to_add); + + bool opal_dss_too_small(opal_buffer_t *buffer, size_t bytes_reqd); + + opal_dss_type_info_t* opal_dss_find_type(opal_data_type_t type); + + int opal_dss_store_data_type(opal_buffer_t *buffer, opal_data_type_t type); + + int opal_dss_get_data_type(opal_buffer_t *buffer, opal_data_type_t *type); + +END_C_DECLS + +#endif diff --git a/orte/dss/dss_internal_functions.c b/opal/dss/dss_internal_functions.c similarity index 62% rename from orte/dss/dss_internal_functions.c rename to opal/dss/dss_internal_functions.c index 6c1a1861b7..d8e31a2850 100644 --- a/orte/dss/dss_internal_functions.c +++ b/opal/dss/dss_internal_functions.c @@ -16,28 +16,22 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" #include #ifdef HAVE_UNISTD_H #include #endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include "opal/util/output.h" +#include "opal/class/opal_pointer_array.h" -#include "orte/class/orte_pointer_array.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" +#include "opal/dss/dss_internal.h" /** * Internal function that resizes (expands) an inuse buffer if * necessary. */ -char* orte_dss_buffer_extend(orte_buffer_t *buffer, size_t bytes_to_add) +char* opal_dss_buffer_extend(opal_buffer_t *buffer, size_t bytes_to_add) { size_t required, to_alloc; size_t pack_offset, unpack_offset; @@ -49,13 +43,13 @@ char* orte_dss_buffer_extend(orte_buffer_t *buffer, size_t bytes_to_add) } required = buffer->bytes_used + bytes_to_add; - if(required >= (size_t)orte_dss_threshold_size) { - to_alloc = ((required + orte_dss_threshold_size - 1) - / orte_dss_threshold_size) * orte_dss_threshold_size; + if(required >= (size_t)opal_dss_threshold_size) { + to_alloc = ((required + opal_dss_threshold_size - 1) + / opal_dss_threshold_size) * opal_dss_threshold_size; } else { to_alloc = buffer->bytes_allocated; if(0 == to_alloc) { - to_alloc = orte_dss_initial_size; + to_alloc = opal_dss_initial_size; } while(to_alloc < required) { to_alloc <<= 1; @@ -75,7 +69,6 @@ char* orte_dss_buffer_extend(orte_buffer_t *buffer, size_t bytes_to_add) } if (NULL == buffer->base_ptr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return NULL; } buffer->pack_ptr = ((char*) buffer->base_ptr) + pack_offset; @@ -91,12 +84,11 @@ char* orte_dss_buffer_extend(orte_buffer_t *buffer, size_t bytes_to_add) * Internal function that checks to see if the specified number of bytes * remain in the buffer for unpacking */ -bool orte_dss_too_small(orte_buffer_t *buffer, size_t bytes_reqd) +bool opal_dss_too_small(opal_buffer_t *buffer, size_t bytes_reqd) { size_t bytes_remaining_packed; if (buffer->pack_ptr < buffer->unpack_ptr) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE); return true; } @@ -112,39 +104,29 @@ bool orte_dss_too_small(orte_buffer_t *buffer, size_t bytes_reqd) return false; } -int orte_dss_store_data_type(orte_buffer_t *buffer, orte_data_type_t type) +int opal_dss_store_data_type(opal_buffer_t *buffer, opal_data_type_t type) { - int rc; - orte_dss_type_info_t *info; + opal_dss_type_info_t *info; - /* Lookup the pack function for the actual orte_data_type type and call it */ + /* Lookup the pack function for the actual opal_data_type type and call it */ - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, ORTE_DATA_TYPE_T))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, OPAL_DATA_TYPE_T))) { + return OPAL_ERR_PACK_FAILURE; } - if (ORTE_SUCCESS != (rc = info->odti_pack_fn(buffer, &type, 1, ORTE_DATA_TYPE_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; + return info->odti_pack_fn(buffer, &type, 1, OPAL_DATA_TYPE_T); } -int orte_dss_get_data_type(orte_buffer_t *buffer, orte_data_type_t *type) +int opal_dss_get_data_type(opal_buffer_t *buffer, opal_data_type_t *type) { - int rc; - orte_dss_type_info_t *info; - orte_std_cntr_t n=1; + opal_dss_type_info_t *info; + int32_t n=1; - /* Lookup the unpack function for the actual orte_data_type type and call it */ + /* Lookup the unpack function for the actual opal_data_type type and call it */ - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, ORTE_DATA_TYPE_T))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, OPAL_DATA_TYPE_T))) { + return OPAL_ERR_PACK_FAILURE; } - rc = info->odti_unpack_fn(buffer, type, &n, ORTE_DATA_TYPE_T); - - return rc; + return info->odti_unpack_fn(buffer, type, &n, OPAL_DATA_TYPE_T); } diff --git a/orte/dss/dss_load_unload.c b/opal/dss/dss_load_unload.c similarity index 73% rename from orte/dss/dss_load_unload.c rename to opal/dss/dss_load_unload.c index e4d51a553b..c2e9cb98e0 100644 --- a/orte/dss/dss_load_unload.c +++ b/opal/dss/dss_load_unload.c @@ -17,59 +17,47 @@ */ /* - * DPS Buffer Operations - */ - -/** @file: - * + * DSS Buffer Operations */ +#include "opal_config.h" -#include "orte_config.h" - -#include -#ifdef HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" +#include "opal/dss/dss_internal.h" -int orte_dss_unload(orte_buffer_t *buffer, void **payload, - orte_std_cntr_t *bytes_used) +int opal_dss_unload(opal_buffer_t *buffer, void **payload, + int32_t *bytes_used) { char *hdr_dst = NULL; - orte_dss_buffer_type_t type; + opal_dss_buffer_type_t type; /* check that buffer is not null */ if (!buffer) { - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* were we given someplace to point to the payload */ if (NULL == payload) { - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* anything in the buffer - if not, nothing to do */ if (NULL == buffer->base_ptr || 0 == buffer->bytes_used) { *payload = NULL; *bytes_used = 0; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* add room for our description of the buffer -- currently just the type */ - if (NULL == (hdr_dst = orte_dss_buffer_extend(buffer, - sizeof(orte_dss_buffer_type_t)))) { - return ORTE_ERR_OUT_OF_RESOURCE; + if (NULL == (hdr_dst = opal_dss_buffer_extend(buffer, + sizeof(opal_dss_buffer_type_t)))) { + return OPAL_ERR_OUT_OF_RESOURCE; } /* add the header (at the end, so perhaps it's a footer? */ type = buffer->type; - ORTE_DSS_BUFFER_TYPE_HTON(type); - memcpy(hdr_dst, &type, sizeof(orte_dss_buffer_type_t)); - buffer->bytes_used += sizeof(orte_dss_buffer_type_t); + OPAL_DSS_BUFFER_TYPE_HTON(type); + memcpy(hdr_dst, &type, sizeof(opal_dss_buffer_type_t)); + buffer->bytes_used += sizeof(opal_dss_buffer_type_t); /* okay, we have something to provide - pass it back */ *payload = buffer->base_ptr; @@ -82,24 +70,24 @@ int orte_dss_unload(orte_buffer_t *buffer, void **payload, /* All done */ - return ORTE_SUCCESS; + return OPAL_SUCCESS; } -int orte_dss_load(orte_buffer_t *buffer, void *payload, - orte_std_cntr_t bytes_used) +int opal_dss_load(opal_buffer_t *buffer, void *payload, + int32_t bytes_used) { char *hdr_ptr; - orte_dss_buffer_type_t type; + opal_dss_buffer_type_t type; /* check to see if the buffer has been initialized */ if (NULL == buffer) { - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* check that the payload is there */ if (NULL == payload) { - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* check if buffer already has payload - free it if so */ @@ -108,11 +96,11 @@ int orte_dss_load(orte_buffer_t *buffer, void *payload, } /* get our header */ - hdr_ptr = (char*) payload + bytes_used - sizeof(orte_dss_buffer_type_t); - memcpy(&type, hdr_ptr, sizeof(orte_dss_buffer_type_t)); - ORTE_DSS_BUFFER_TYPE_NTOH(type); + hdr_ptr = (char*) payload + bytes_used - sizeof(opal_dss_buffer_type_t); + memcpy(&type, hdr_ptr, sizeof(opal_dss_buffer_type_t)); + OPAL_DSS_BUFFER_TYPE_NTOH(type); buffer->type = type; - bytes_used -= sizeof(orte_dss_buffer_type_t); + bytes_used -= sizeof(opal_dss_buffer_type_t); /* populate the buffer */ buffer->base_ptr = (char*)payload; @@ -126,7 +114,7 @@ int orte_dss_load(orte_buffer_t *buffer, void *payload, /* All done */ - return ORTE_SUCCESS; + return OPAL_SUCCESS; } @@ -139,14 +127,13 @@ int orte_dss_load(orte_buffer_t *buffer, void *payload, * looks functionally a lot more like a destructive "copy" - both for * the source and destination buffers - then a direct transfer of data! */ -int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src) +int opal_dss_xfer_payload(opal_buffer_t *dest, opal_buffer_t *src) { int rc; /* ensure we have valid source and destination */ if (NULL == dest || NULL == src) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* if the dest is already populated, release the data */ @@ -163,8 +150,7 @@ int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src) /* copy the src payload to the dest - this will allocate "fresh" * memory for the unpacked payload remaining in the src buffer */ - if (ORTE_SUCCESS != (rc = orte_dss_copy_payload(dest, src))) { - ORTE_ERROR_LOG(rc); + if (OPAL_SUCCESS != (rc = opal_dss_copy_payload(dest, src))) { return rc; } @@ -174,7 +160,7 @@ int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src) src->pack_ptr = src->unpack_ptr = NULL; src->bytes_allocated = src->bytes_used = 0; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } @@ -182,15 +168,14 @@ int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src) * The complete contents of the src buffer are NOT copied - only that * portion that has not been previously unpacked is copied. */ -int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src) +int opal_dss_copy_payload(opal_buffer_t *dest, opal_buffer_t *src) { char *dst_ptr; - orte_std_cntr_t bytes_left; + int32_t bytes_left; /* ensure we have valid source and destination */ if (NULL == dest || NULL == src) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* if the dest is already populated, check to ensure that both @@ -198,8 +183,7 @@ int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src) */ if (0 != dest->bytes_used) { if (dest->type != src->type) { - ORTE_ERROR_LOG(ORTE_ERR_BUFFER); - return ORTE_ERR_BUFFER; + return OPAL_ERR_BUFFER; } } @@ -219,12 +203,12 @@ int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src) /* if nothing is left, then nothing to do */ if (0 == bytes_left) { - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* add room to the dest for the src buffer's payload */ - if (NULL == (dst_ptr = orte_dss_buffer_extend(dest, bytes_left))) { - return ORTE_ERR_OUT_OF_RESOURCE; + if (NULL == (dst_ptr = opal_dss_buffer_extend(dest, bytes_left))) { + return OPAL_ERR_OUT_OF_RESOURCE; } /* copy the src payload to the specified location in dest */ @@ -234,6 +218,6 @@ int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src) dest->bytes_used += bytes_left; dest->pack_ptr = ((char*)dest->pack_ptr) + bytes_left; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/orte/dss/dss_lookup.c b/opal/dss/dss_lookup.c similarity index 69% rename from orte/dss/dss_lookup.c rename to opal/dss/dss_lookup.c index a2fe8bcfa0..bcb23d3113 100644 --- a/orte/dss/dss_lookup.c +++ b/opal/dss/dss_lookup.c @@ -16,25 +16,16 @@ * $HEADER$ */ -#include "orte_config.h" -#include "orte/orte_types.h" +#include "opal_config.h" -#include "orte/mca/errmgr/errmgr.h" +#include "opal/dss/dss_internal.h" -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - - -char *orte_dss_lookup_data_type(orte_data_type_t type) +char *opal_dss_lookup_data_type(opal_data_type_t type) { - orte_dss_type_info_t *info; + opal_dss_type_info_t *info; char *name; - if (!(type < orte_dss_types->size)) { - return NULL; - } - - info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type); + info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type); if (NULL != info) { /* type found on list */ name = strdup(info->odti_name); return name; diff --git a/opal/dss/dss_open_close.c b/opal/dss/dss_open_close.c new file mode 100644 index 0000000000..a10cfa12da --- /dev/null +++ b/opal/dss/dss_open_close.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ +#include "opal_config.h" + +#include "opal/mca/base/mca_base_param.h" + +#include "opal/dss/dss_internal.h" + +/** + * globals + */ +bool opal_dss_initialized = false; +bool opal_dss_debug = false; +int opal_dss_verbose = -1; /* by default disabled */ +int opal_dss_initial_size; +int opal_dss_threshold_size; +opal_pointer_array_t opal_dss_types; +opal_data_type_t opal_dss_num_reg_types; +opal_dss_buffer_type_t default_buf_type; + +opal_dss_t opal_dss = { + opal_dss_set, + opal_dss_get, + opal_dss_arith, + opal_dss_increment, + opal_dss_decrement, + opal_dss_set_buffer_type, + opal_dss_pack, + opal_dss_unpack, + opal_dss_copy, + opal_dss_compare, + opal_dss_size, + opal_dss_print, + opal_dss_release, + opal_dss_peek, + opal_dss_unload, + opal_dss_load, + opal_dss_xfer_payload, + opal_dss_copy_payload, + opal_dss_register, + opal_dss_lookup_data_type, + opal_dss_dump_data_types, + opal_dss_dump, + opal_dss_pack_buffer, + opal_dss_unpack_buffer +}; + +/** + * Object constructors, destructors, and instantiations + */ +/** Data Value **/ +/* constructor - used to initialize state of data value instance */ +static void opal_data_value_construct(opal_dss_value_t* ptr) +{ + ptr->type = OPAL_UNDEF; + ptr->data = NULL; +} +/* destructor - used to release data value instance */ +static void opal_data_value_destruct(opal_dss_value_t* ptr) +{ + if (NULL != ptr->data) { + opal_dss.release(ptr); + } +} + +/* define instance of opal_class_t */ +OBJ_CLASS_INSTANCE( + opal_dss_value_t, /* type name */ + opal_object_t, /* parent "class" name */ + opal_data_value_construct, /* constructor */ + opal_data_value_destruct); /* destructor */ + + +static void opal_buffer_construct (opal_buffer_t* buffer) +{ + /** set the default buffer type */ + buffer->type = default_buf_type; + + /* Make everything NULL to begin with */ + + buffer->base_ptr = buffer->pack_ptr = buffer->unpack_ptr = NULL; + buffer->bytes_allocated = buffer->bytes_used = 0; +} + +static void opal_buffer_destruct (opal_buffer_t* buffer) +{ + if (NULL != buffer) { + if (NULL != buffer->base_ptr) { + free (buffer->base_ptr); + } + } +} + +OBJ_CLASS_INSTANCE(opal_buffer_t, + opal_object_t, + opal_buffer_construct, + opal_buffer_destruct); + + +static void opal_dss_type_info_construct(opal_dss_type_info_t *obj) +{ + obj->odti_name = NULL; + obj->odti_pack_fn = NULL; + obj->odti_unpack_fn = NULL; + obj->odti_copy_fn = NULL; + obj->odti_compare_fn = NULL; + obj->odti_size_fn = NULL; + obj->odti_print_fn = NULL; + obj->odti_release_fn = NULL; + obj->odti_structured = false; +} + +static void opal_dss_type_info_destruct(opal_dss_type_info_t *obj) +{ + if (NULL != obj->odti_name) { + free(obj->odti_name); + } +} + +OBJ_CLASS_INSTANCE(opal_dss_type_info_t, opal_object_t, + opal_dss_type_info_construct, + opal_dss_type_info_destruct); + + +int opal_dss_open(void) +{ + char *enviro_val; + int id, rc; + opal_data_type_t tmp; + int def_type; + + if (opal_dss_initialized) { + return OPAL_SUCCESS; + } + + enviro_val = getenv("OPAL_dss_debug"); + if (NULL != enviro_val) { /* debug requested */ + opal_dss_debug = true; + } else { + opal_dss_debug = false; + } + + /** set the default buffer type. If we are in debug mode, then we default + * to fully described buffers. Otherwise, we default to non-described for brevity + * and performance + */ +#if OMPI_ENABLE_DEBUG + def_type = OPAL_DSS_BUFFER_FULLY_DESC; +#else + def_type = OPAL_DSS_BUFFER_NON_DESC; +#endif + + id = mca_base_param_register_int("dss", "buffer", "type", + "Set the default mode for OpenRTE buffers (0=non-described, 1=described)", + def_type); + mca_base_param_lookup_int(id, &rc); + default_buf_type = rc; + + /* setup the initial size of the buffer. */ + id = mca_base_param_register_int("dss", "buffer_initial", "size", NULL, + OPAL_DSS_DEFAULT_INITIAL_SIZE); + mca_base_param_lookup_int(id, &opal_dss_initial_size); + + /* the threshold as to where to stop doubling the size of the buffer + * allocated memory and start doing additive increases */ + id = mca_base_param_register_int("dss", "buffer_threshold", "size", NULL, + OPAL_DSS_DEFAULT_THRESHOLD_SIZE); + mca_base_param_lookup_int(id, &opal_dss_threshold_size); + + /* Setup the types array */ + OBJ_CONSTRUCT(&opal_dss_types, opal_pointer_array_t); + if (OPAL_SUCCESS != (rc = opal_pointer_array_init(&opal_dss_types, + OPAL_DSS_ID_DYNAMIC, + OPAL_DSS_ID_MAX, + OPAL_DSS_ID_MAX))) { + return rc; + } + opal_dss_num_reg_types = 0; + + /* Register all the intrinsic types */ + + tmp = OPAL_NULL; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_null, + opal_dss_unpack_null, + (opal_dss_copy_fn_t)opal_dss_copy_null, + (opal_dss_compare_fn_t)opal_dss_compare_null, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_null, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_NULL", &tmp))) { + return rc; + } + tmp = OPAL_BYTE; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_byte, + opal_dss_unpack_byte, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_byte, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_byte, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_BYTE", &tmp))) { + return rc; + } + tmp = OPAL_BOOL; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_bool, + opal_dss_unpack_bool, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_bool, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_bool, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_BOOL", &tmp))) { + return rc; + } + tmp = OPAL_INT; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int, + opal_dss_unpack_int, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT", &tmp))) { + return rc; + } + tmp = OPAL_UINT; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int, + opal_dss_unpack_int, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT", &tmp))) { + return rc; + } + tmp = OPAL_INT8; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_byte, + opal_dss_unpack_byte, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int8, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int8, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT8", &tmp))) { + return rc; + } + tmp = OPAL_UINT8; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_byte, + opal_dss_unpack_byte, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint8, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint8, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT8", &tmp))) { + return rc; + } + tmp = OPAL_INT16; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int16, + opal_dss_unpack_int16, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int16, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int16, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT16", &tmp))) { + return rc; + } + tmp = OPAL_UINT16; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int16, + opal_dss_unpack_int16, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint16, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint16, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT16", &tmp))) { + return rc; + } + tmp = OPAL_INT32; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int32, + opal_dss_unpack_int32, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int32, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int32, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT32", &tmp))) { + return rc; + } + tmp = OPAL_UINT32; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int32, + opal_dss_unpack_int32, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint32, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint32, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT32", &tmp))) { + return rc; + } + tmp = OPAL_INT64; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int64, + opal_dss_unpack_int64, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_int64, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_int64, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_INT64", &tmp))) { + return rc; + } + tmp = OPAL_UINT64; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_int64, + opal_dss_unpack_int64, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_uint64, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_uint64, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_UINT64", &tmp))) { + return rc; + } + tmp = OPAL_SIZE; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_sizet, + opal_dss_unpack_sizet, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_size, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_size, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_SIZE", &tmp))) { + return rc; + } + tmp = OPAL_PID; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_pid, + opal_dss_unpack_pid, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_pid, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_pid, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_PID", &tmp))) { + return rc; + } + tmp = OPAL_STRING; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_string, + opal_dss_unpack_string, + (opal_dss_copy_fn_t)opal_dss_copy_string, + (opal_dss_compare_fn_t)opal_dss_compare_string, + (opal_dss_size_fn_t)opal_dss_size_string, + (opal_dss_print_fn_t)opal_dss_print_string, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_STRUCTURED, + "OPAL_STRING", &tmp))) { + return rc; + } + tmp = OPAL_DATA_TYPE; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_data_type, + opal_dss_unpack_data_type, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_dt, + (opal_dss_size_fn_t)opal_dss_std_size, + (opal_dss_print_fn_t)opal_dss_print_data_type, + (opal_dss_release_fn_t)opal_dss_std_release, + OPAL_DSS_UNSTRUCTURED, + "OPAL_DATA_TYPE", &tmp))) { + return rc; + } + tmp = OPAL_DATA_VALUE; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_data_value, + opal_dss_unpack_data_value, + (opal_dss_copy_fn_t)opal_dss_copy_data_value, + (opal_dss_compare_fn_t)opal_dss_compare_data_value, + (opal_dss_size_fn_t)opal_dss_size_data_value, + (opal_dss_print_fn_t)opal_dss_print_data_value, + (opal_dss_release_fn_t)opal_dss_std_obj_release, + OPAL_DSS_STRUCTURED, + "OPAL_DATA_VALUE", &tmp))) { + return rc; + } + + tmp = OPAL_BYTE_OBJECT; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_byte_object, + opal_dss_unpack_byte_object, + (opal_dss_copy_fn_t)opal_dss_copy_byte_object, + (opal_dss_compare_fn_t)opal_dss_compare_byte_object, + (opal_dss_size_fn_t)opal_dss_size_byte_object, + (opal_dss_print_fn_t)opal_dss_print_byte_object, + (opal_dss_release_fn_t)opal_dss_release_byte_object, + OPAL_DSS_STRUCTURED, + "OPAL_BYTE_OBJECT", &tmp))) { + return rc; + } + + /* All done */ + + return OPAL_SUCCESS; +} + + +int opal_dss_close(void) +{ + int32_t i; + + opal_dss_initialized = false; + + for (i = 0 ; i < opal_pointer_array_get_size(&opal_dss_types) ; ++i) { + opal_dss_type_info_t *info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, i); + if (NULL != info) { + OBJ_RELEASE(info); + } + } + + OBJ_DESTRUCT(&opal_dss_types); + + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_pack.c b/opal/dss/dss_pack.c new file mode 100644 index 0000000000..02f5d4aed3 --- /dev/null +++ b/opal/dss/dss_pack.c @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/types.h" +#include "opal/util/output.h" +#include "opal/dss/dss_internal.h" + +int opal_dss_pack(opal_buffer_t *buffer, const void *src, int32_t num_vals, + opal_data_type_t type) +{ + int rc; + + /* check for error */ + if (NULL == buffer) { + return OPAL_ERR_BAD_PARAM; + } + + /* Pack the number of values */ + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + if (OPAL_SUCCESS != (rc = opal_dss_store_data_type(buffer, OPAL_INT32))) { + return rc; + } + } + if (OPAL_SUCCESS != (rc = opal_dss_pack_int32(buffer, &num_vals, 1, OPAL_INT32))) { + return rc; + } + + /* Pack the value(s) */ + return opal_dss_pack_buffer(buffer, src, num_vals, type); +} + +int opal_dss_pack_buffer(opal_buffer_t *buffer, const void *src, int32_t num_vals, + opal_data_type_t type) +{ + int rc; + opal_dss_type_info_t *info; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_buffer( %p, %p, %lu, %d )\n", + (void*)buffer, src, (long unsigned int)num_vals, (int)type ) ); + + /* Pack the declared data type */ + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + if (OPAL_SUCCESS != (rc = opal_dss_store_data_type(buffer, type))) { + return rc; + } + } + + /* Lookup the pack function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_PACK_FAILURE; + } + + return info->odti_pack_fn(buffer, src, num_vals, type); +} + + +/* PACK FUNCTIONS FOR GENERIC SYSTEM TYPES */ + +/* + * BOOL + */ +int opal_dss_pack_bool(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. If we aren't fully described, then add the + description for this type... */ + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, DSS_TYPE_BOOL))) { + return ret; + } + } + + /* Turn around and pack the real type */ + return opal_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_BOOL); +} + +/* + * INT + */ +int opal_dss_pack_int(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. If we aren't fully described, then add the + description for this type... */ + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, DSS_TYPE_INT))) { + return ret; + } + } + + /* Turn around and pack the real type */ + return opal_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_INT); +} + +/* + * SIZE_T + */ +int opal_dss_pack_sizet(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. If we aren't fully described, then add the + description for this type... */ + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, DSS_TYPE_SIZE_T))) { + return ret; + } + } + + return opal_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_SIZE_T); +} + +/* + * PID_T + */ +int opal_dss_pack_pid(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. If we aren't fully described, then add the + description for this type... */ + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, DSS_TYPE_PID_T))) { + return ret; + } + } + + /* Turn around and pack the real type */ + return opal_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_PID_T); +} + + +/* PACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ + +/* + * NULL + */ +int opal_dss_pack_null(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + char null=0x00; + char *dst; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_null * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, num_vals))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* store the nulls */ + memset(dst, (int)null, num_vals); + + /* update buffer pointers */ + buffer->pack_ptr += num_vals; + buffer->bytes_used += num_vals; + + return OPAL_SUCCESS; +} + +/* + * BYTE, CHAR, INT8 + */ +int opal_dss_pack_byte(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + char *dst; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_byte * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, num_vals))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* store the data */ + memcpy(dst, src, num_vals); + + /* update buffer pointers */ + buffer->pack_ptr += num_vals; + buffer->bytes_used += num_vals; + + return OPAL_SUCCESS; +} + +/* + * INT16 + */ +int opal_dss_pack_int16(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int32_t i; + uint16_t tmp, *srctmp = (uint16_t*) src; + char *dst; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_int16 * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, num_vals*sizeof(tmp)))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (i = 0; i < num_vals; ++i) { + tmp = htons(srctmp[i]); + memcpy(dst, &tmp, sizeof(tmp)); + dst += sizeof(tmp); + } + buffer->pack_ptr += num_vals * sizeof(tmp); + buffer->bytes_used += num_vals * sizeof(tmp); + + return OPAL_SUCCESS; +} + +/* + * INT32 + */ +int opal_dss_pack_int32(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int32_t i; + uint32_t tmp, *srctmp = (uint32_t*) src; + char *dst; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_int32 * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, num_vals*sizeof(tmp)))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (i = 0; i < num_vals; ++i) { + tmp = htonl(srctmp[i]); + memcpy(dst, &tmp, sizeof(tmp)); + dst += sizeof(tmp); + } + buffer->pack_ptr += num_vals * sizeof(tmp); + buffer->bytes_used += num_vals * sizeof(tmp); + + return OPAL_SUCCESS; +} + +/* + * INT64 + */ +int opal_dss_pack_int64(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int32_t i; + uint64_t tmp, *srctmp = (uint64_t*) src; + char *dst; + size_t bytes_packed = num_vals * sizeof(tmp); + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_pack_int64 * %d\n", num_vals ) ); + /* check to see if buffer needs extending */ + if (NULL == (dst = opal_dss_buffer_extend(buffer, bytes_packed))) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (i = 0; i < num_vals; ++i) { + tmp = hton64(srctmp[i]); + memcpy(dst, &tmp, sizeof(tmp)); + dst += sizeof(tmp); + } + buffer->pack_ptr += bytes_packed; + buffer->bytes_used += bytes_packed; + + return OPAL_SUCCESS; +} + +/* + * STRING + */ +int opal_dss_pack_string(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret = OPAL_SUCCESS; + int32_t i, len; + char **ssrc = (char**) src; + + for (i = 0; i < num_vals; ++i) { + if (NULL == ssrc[i]) { /* got zero-length string/NULL pointer - store NULL */ + len = 0; + if (OPAL_SUCCESS != (ret = opal_dss_pack_int32(buffer, &len, 1, OPAL_INT32))) { + return ret; + } + } else { + len = (int32_t)strlen(ssrc[i]) + 1; + if (OPAL_SUCCESS != (ret = opal_dss_pack_int32(buffer, &len, 1, OPAL_INT32))) { + return ret; + } + if (OPAL_SUCCESS != (ret = + opal_dss_pack_byte(buffer, ssrc[i], len, OPAL_BYTE))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} + +/* PACK FUNCTIONS FOR GENERIC OPAL TYPES */ + +/* + * OPAL_DATA_TYPE + */ +int opal_dss_pack_data_type(opal_buffer_t *buffer, const void *src, int32_t num_vals, + opal_data_type_t type) +{ + int ret; + + /* Turn around and pack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, src, num_vals, OPAL_DATA_TYPE_T))) { + } + + return ret; +} + +/* + * OPAL_DATA_VALUE + */ +int opal_dss_pack_data_value(opal_buffer_t *buffer, const void *src, int32_t num, opal_data_type_t type) +{ + opal_dss_type_info_t *info; + opal_dss_value_t **sdv; + int32_t i; + int ret; + + sdv = (opal_dss_value_t **) src; + + for (i = 0; i < num; ++i) { + /* if the src data value is NULL, then we will pack it as OPAL_NULL to indicate + * that the unpack should leave a NULL data value + */ + if (NULL == sdv[i]) { + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, OPAL_NULL))) { + return ret; + } + continue; + } + + /* pack the data type - we'll need it on the other end */ + if (OPAL_SUCCESS != (ret = opal_dss_store_data_type(buffer, sdv[i]->type))) { + return ret; + } + + /* if the data type is UNDEF, then nothing more to do */ + if (OPAL_UNDEF == sdv[i]->type) continue; + + /* Lookup the pack function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, sdv[i]->type))) { + return OPAL_ERR_PACK_FAILURE; + } + + if (info->odti_structured) { + if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, &(sdv[i]->data), 1, sdv[i]->type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, sdv[i]->data, 1, sdv[i]->type))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} + +/* + * OPAL_BYTE_OBJECT + */ +int opal_dss_pack_byte_object(opal_buffer_t *buffer, const void *src, int32_t num, + opal_data_type_t type) +{ + opal_byte_object_t **sbyteptr; + int32_t i, n; + int ret; + + sbyteptr = (opal_byte_object_t **) src; + + for (i = 0; i < num; ++i) { + n = sbyteptr[i]->size; + if (OPAL_SUCCESS != (ret = opal_dss_pack_int32(buffer, &n, 1, OPAL_INT32))) { + return ret; + } + if (0 < n) { + if (OPAL_SUCCESS != (ret = + opal_dss_pack_byte(buffer, sbyteptr[i]->bytes, n, OPAL_BYTE))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} diff --git a/orte/dss/dss_peek.c b/opal/dss/dss_peek.c similarity index 52% rename from orte/dss/dss_peek.c rename to opal/dss/dss_peek.c index 9688a4a34f..78ec5f38cb 100644 --- a/orte/dss/dss_peek.c +++ b/opal/dss/dss_peek.c @@ -16,115 +16,101 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include "orte/mca/errmgr/errmgr.h" +#include "opal/dss/dss_internal.h" -#include "orte/dss/dss_internal.h" - - -int orte_dss_peek(orte_buffer_t *buffer, orte_data_type_t *type, - orte_std_cntr_t *num_vals) +int opal_dss_peek(opal_buffer_t *buffer, opal_data_type_t *type, + int32_t *num_vals) { int ret; - orte_buffer_t tmp; - orte_std_cntr_t n=1; - orte_data_type_t local_type; + opal_buffer_t tmp; + int32_t n=1; + opal_data_type_t local_type; /* check for errors */ if (buffer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* Double check and ensure that there is data left in the buffer. */ if (buffer->unpack_ptr >= buffer->base_ptr + buffer->bytes_used) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER); - *type = ORTE_NULL; + *type = OPAL_NULL; *num_vals = 0; - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; } /* if this is NOT a fully described buffer, then that is as much as * we can do - there is no way we can tell the caller what type is * in the buffer since that info wasn't stored. */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - *type = ORTE_UNDEF; + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + *type = OPAL_UNDEF; *num_vals = 0; - return ORTE_ERR_UNKNOWN_DATA_TYPE; + return OPAL_ERR_UNKNOWN_DATA_TYPE; } /* cheat: unpack from a copy of the buffer -- leaving all the original pointers intact */ tmp = *buffer; - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(&tmp, &local_type))) { - ORTE_ERROR_LOG(ret); - *type = ORTE_NULL; + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(&tmp, &local_type))) { + *type = OPAL_NULL; *num_vals = 0; return ret; } - if (ORTE_STD_CNTR != local_type) { /* if the length wasn't first, then error */ - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE); - *type = ORTE_NULL; + if (OPAL_INT32 != local_type) { /* if the length wasn't first, then error */ + *type = OPAL_NULL; *num_vals = 0; - return ORTE_ERR_UNPACK_FAILURE; + return OPAL_ERR_UNPACK_FAILURE; } - if (ORTE_SUCCESS != (ret = orte_dss_unpack_std_cntr(&tmp, num_vals, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - *type = ORTE_NULL; + if (OPAL_SUCCESS != (ret = opal_dss_unpack_int32(&tmp, num_vals, &n, OPAL_INT32))) { + *type = OPAL_NULL; *num_vals = 0; return ret; } - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(&tmp, type))) { - ORTE_ERROR_LOG(ret); - *type = ORTE_NULL; + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(&tmp, type))) { + *type = OPAL_NULL; *num_vals = 0; } return ret; } -int orte_dss_peek_type(orte_buffer_t *buffer, orte_data_type_t *type) +int opal_dss_peek_type(opal_buffer_t *buffer, opal_data_type_t *type) { int ret; - orte_buffer_t tmp; + opal_buffer_t tmp; /* check for errors */ if (buffer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* if this is NOT a fully described buffer, then there isn't anything * we can do - there is no way we can tell the caller what type is * in the buffer since that info wasn't stored. */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - *type = ORTE_UNDEF; - return ORTE_ERR_UNKNOWN_DATA_TYPE; + if (OPAL_DSS_BUFFER_FULLY_DESC != buffer->type) { + *type = OPAL_UNDEF; + return OPAL_ERR_UNKNOWN_DATA_TYPE; } /* Double check and ensure that there is data left in the buffer. */ if (buffer->unpack_ptr >= buffer->base_ptr + buffer->bytes_used) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER); - *type = ORTE_UNDEF; - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + *type = OPAL_UNDEF; + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; } /* cheat: unpack from a copy of the buffer -- leaving all the original pointers intact */ tmp = *buffer; - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(&tmp, type))) { - ORTE_ERROR_LOG(ret); - *type = ORTE_UNDEF; + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(&tmp, type))) { + *type = OPAL_UNDEF; return ret; } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/opal/dss/dss_print.c b/opal/dss/dss_print.c new file mode 100644 index 0000000000..e17f86075d --- /dev/null +++ b/opal/dss/dss_print.c @@ -0,0 +1,461 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include + +#include "opal/dss/dss_internal.h" + +int opal_dss_print(char **output, char *prefix, void *src, opal_data_type_t type) +{ + opal_dss_type_info_t *info; + + /* check for error */ + if (NULL == output) { + return OPAL_ERR_BAD_PARAM; + } + + /* Lookup the print function for this type and call it */ + + if(NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNKNOWN_DATA_TYPE; + } + + return info->odti_print_fn(output, prefix, src, type); +} + +/* + * STANDARD PRINT FUNCTIONS FOR SYSTEM TYPES + */ +int opal_dss_print_byte(char **output, char *prefix, uint8_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_BYTE\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_BYTE\tValue: %x", prefix, *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_string(char **output, char *prefix, char *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_STRING\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_STRING\tValue: %s", prefx, src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_size(char **output, char *prefix, size_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_SIZE\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_SIZE\tValue: %lu", prefx, (unsigned long) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_pid(char **output, char *prefix, pid_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_PID\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_PID\tValue: %lu", prefx, (unsigned long) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_bool(char **output, char *prefix, bool *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_BOOL\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_BOOL\tValue: %s", prefx, *src ? "TRUE" : "FALSE"); + + return OPAL_SUCCESS; +} + +int opal_dss_print_int(char **output, char *prefix, int *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_INT\tValue: %ld", prefx, (long) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_uint(char **output, char *prefix, int *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_UINT\tValue: %lu", prefx, (unsigned long) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_uint8(char **output, char *prefix, uint8_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT8\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_UINT8\tValue: %u", prefx, (unsigned int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_uint16(char **output, char *prefix, uint16_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT16\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_UINT16\tValue: %u", prefx, (unsigned int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_uint32(char **output, char *prefix, uint32_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT32\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_UINT32\tValue: %u", prefx, (unsigned int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_int8(char **output, char *prefix, int8_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT8\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_INT8\tValue: %d", prefx, (int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_int16(char **output, char *prefix, int16_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT16\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_INT16\tValue: %d", prefx, (int) *src); + + return OPAL_SUCCESS; +} + +int opal_dss_print_int32(char **output, char *prefix, int32_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT32\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_INT32\tValue: %d", prefx, (int) *src); + + return OPAL_SUCCESS; +} +int opal_dss_print_uint64(char **output, char *prefix, +#ifdef HAVE_INT64_T + uint64_t *src, +#else + void *src, +#endif /* HAVE_INT64_T */ + opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_UINT64\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + +#ifdef HAVE_INT64_T + asprintf(output, "%sData type: OPAL_UINT64\tValue: %lu", prefx, (unsigned long) *src); +#else + asprintf(output, "%sData type: OPAL_UINT64\tValue: unsupported", prefx); +#endif /* HAVE_INT64_T */ + + return OPAL_SUCCESS; +} + +int opal_dss_print_int64(char **output, char *prefix, +#ifdef HAVE_INT64_T + int64_t *src, +#else + void *src, +#endif /* HAVE_INT64_T */ + opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_INT64\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + +#ifdef HAVE_INT64_T + asprintf(output, "%sData type: OPAL_INT64\tValue: %ld", prefx, (long) *src); +#else + asprintf(output, "%sData type: OPAL_INT64\tValue: unsupported", prefx); +#endif /* HAVE_INT64_T */ + + return OPAL_SUCCESS; +} + +int opal_dss_print_null(char **output, char *prefix, void *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_NULL\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_NULL", prefx); + + return OPAL_SUCCESS; +} + + +/* PRINT FUNCTIONS FOR GENERIC OPAL TYPES */ + +/* + * OPAL_DATA_TYPE + */ +int opal_dss_print_data_type(char **output, char *prefix, opal_data_type_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_DATA_TYPE\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_DATA_TYPE\tValue: %lu", prefx, (unsigned long) *src); + return OPAL_SUCCESS; +} + +/* + * OPAL_DATA_VALUE + */ +int opal_dss_print_data_value(char **output, char *prefix, opal_dss_value_t *src, opal_data_type_t type) +{ + char *pfx, *tmp1, *tmp2; + int rc; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + if (NULL != prefix) { + asprintf(output, "%sData type: OPAL_DATA_VALUE\tValue: NULL pointer", prefix); + } else { + asprintf(output, "Data type: OPAL_DATA_VALUE\tValue: NULL pointer"); + } + return OPAL_SUCCESS; + } + + if (NULL != prefix) { + asprintf(&pfx, "%s\t", prefix); + asprintf(&tmp1, "%sData type: OPAL_DATA_VALUE:\n", prefix); + } else { + asprintf(&tmp1, "Data type: OPAL_DATA_VALUE:\n"); + asprintf(&pfx, "\t"); + } + + /* if data is included, print it */ + if (OPAL_UNDEF == src->type) { /* undefined data type - just report it */ + asprintf(&tmp2, "%sData type: OPAL_UNDEF\tValue: N/A", pfx); + } else if (NULL != src->data) { + if (OPAL_SUCCESS != (rc = opal_dss.print(&tmp2, pfx, src->data, src->type))) { + if (NULL != tmp1) free(tmp1); + if (NULL != pfx) free(pfx); + *output = NULL; + return rc; + } + } else { /* indicate the data field was NULL */ + asprintf(&tmp2, "%sData field is NULL", pfx); + } + + asprintf(output, "%s%s", tmp1, tmp2); + free(tmp1); + free(tmp2); + if (NULL != pfx) free(pfx); + + return OPAL_SUCCESS; +} + +/* + * OPAL_BYTE_OBJECT + */ +int opal_dss_print_byte_object(char **output, char *prefix, opal_byte_object_t *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_BYTE_OBJECT\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_BYTE_OBJECT\tSize: %lu", prefx, (unsigned long) src->size); + + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_register.c b/opal/dss/dss_register.c new file mode 100644 index 0000000000..323b860193 --- /dev/null +++ b/opal/dss/dss_register.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/dss/dss_internal.h" + +int opal_dss_register(opal_dss_pack_fn_t pack_fn, + opal_dss_unpack_fn_t unpack_fn, + opal_dss_copy_fn_t copy_fn, + opal_dss_compare_fn_t compare_fn, + opal_dss_size_fn_t size_fn, + opal_dss_print_fn_t print_fn, + opal_dss_release_fn_t release_fn, + bool structured, + const char *name, opal_data_type_t *type) +{ + opal_dss_type_info_t *info, *ptr; + int32_t i; + + /* Check for bozo cases */ + + if (NULL == pack_fn || NULL == unpack_fn || NULL == copy_fn || NULL == compare_fn || + NULL == size_fn || NULL == print_fn || NULL == name || NULL == type) { + return OPAL_ERR_BAD_PARAM; + } + + /* check if this entry already exists - if so, error - we do NOT allow multiple type registrations */ + for (i=0; i < opal_pointer_array_get_size(&opal_dss_types); i++) { + ptr = opal_pointer_array_get_item(&opal_dss_types, i); + if (NULL != ptr) { + /* check if the name exists */ + if (0 == strcmp(ptr->odti_name, name)) { + return OPAL_ERR_DATA_TYPE_REDEF; + } + /* check if the specified type exists */ + if (*type > 0 && ptr->odti_type == *type) { + return OPAL_ERR_DATA_TYPE_REDEF; + } + } + } + + /* if type is given (i.e., *type > 0), then just use it. + * otherwise, it is an error + */ + if (0 >= *type) { + return OPAL_ERR_BAD_PARAM; + } + + /* Add a new entry to the table */ + info = (opal_dss_type_info_t*) OBJ_NEW(opal_dss_type_info_t); + if (NULL == info) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + info->odti_type = *type; + info->odti_name = strdup(name); + info->odti_pack_fn = pack_fn; + info->odti_unpack_fn = unpack_fn; + info->odti_copy_fn = copy_fn; + info->odti_compare_fn = compare_fn; + info->odti_size_fn = size_fn; + info->odti_print_fn = print_fn; + info->odti_release_fn = release_fn; + info->odti_structured = structured; + + return opal_pointer_array_set_item(&opal_dss_types, *type, info); +} diff --git a/orte/dss/dss_release.c b/opal/dss/dss_release.c similarity index 62% rename from orte/dss/dss_release.c rename to opal/dss/dss_release.c index 1c118c44f0..2614a22c11 100644 --- a/orte/dss/dss_release.c +++ b/opal/dss/dss_release.c @@ -16,33 +16,23 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" +#include "opal/dss/dss_internal.h" #include "opal/util/output.h" -void orte_dss_release(orte_data_value_t *value) +void opal_dss_release(opal_dss_value_t *value) { - orte_dss_type_info_t *info = NULL; + opal_dss_type_info_t *info = NULL; /* check for error */ if (NULL == value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return; } /* Lookup the release function for this type and call it */ - if (!(value->type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, value->type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, value->type))) { return; } @@ -52,7 +42,7 @@ void orte_dss_release(orte_data_value_t *value) /* * STANDARD RELEASE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED */ -void orte_dss_std_release(orte_data_value_t *value) +void opal_dss_std_release(opal_dss_value_t *value) { free(value->data); value->data = NULL; @@ -61,20 +51,20 @@ void orte_dss_std_release(orte_data_value_t *value) /* * STANDARD OBJECT RELEASE FUNCTION - WORKS FOR EVERYTHING */ -void orte_dss_std_obj_release(orte_data_value_t *value) +void opal_dss_std_obj_release(opal_dss_value_t *value) { OBJ_RELEASE(value->data); } /* - * ORTE_BYTE_OBJECT + * OPAL_BYTE_OBJECT */ -void orte_dss_release_byte_object(orte_data_value_t *value) +void opal_dss_release_byte_object(opal_dss_value_t *value) { - orte_byte_object_t *bo; + opal_byte_object_t *bo; - bo = (orte_byte_object_t*)value->data; + bo = (opal_byte_object_t*)value->data; free(bo->bytes); free(value->data); diff --git a/orte/dss/dss_set.c b/opal/dss/dss_set.c similarity index 65% rename from orte/dss/dss_set.c rename to opal/dss/dss_set.c index b7af15e68b..26a7592ac1 100644 --- a/orte/dss/dss_set.c +++ b/opal/dss/dss_set.c @@ -14,26 +14,15 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif +#include "opal/dss/dss_internal.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - -int orte_dss_set(orte_data_value_t *value, void *new_value, orte_data_type_t type) +int opal_dss_set(opal_dss_value_t *value, void *new_value, opal_data_type_t type) { /* check for error */ if (NULL == value || NULL == new_value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* see if a value is already loaded - if so, that's just wrong. We can't @@ -41,8 +30,7 @@ int orte_dss_set(orte_data_value_t *value, void *new_value, orte_data_type_t typ was stored dynamically */ if (NULL != value->data) { - ORTE_ERROR_LOG(ORTE_ERR_DATA_OVERWRITE_ATTEMPT); - return ORTE_ERR_DATA_OVERWRITE_ATTEMPT; + return OPAL_ERR_DATA_OVERWRITE_ATTEMPT; } /* set the type */ @@ -51,6 +39,6 @@ int orte_dss_set(orte_data_value_t *value, void *new_value, orte_data_type_t typ /* point the value to the data object */ value->data = new_value; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/orte/dss/dss_set_buffer_type.c b/opal/dss/dss_set_buffer_type.c similarity index 63% rename from orte/dss/dss_set_buffer_type.c rename to opal/dss/dss_set_buffer_type.c index 664e2bd208..de29626e6f 100644 --- a/orte/dss/dss_set_buffer_type.c +++ b/opal/dss/dss_set_buffer_type.c @@ -14,37 +14,25 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif +#include "opal/dss/dss_internal.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - -int orte_dss_set_buffer_type(orte_buffer_t *buffer, orte_dss_buffer_type_t type) +int opal_dss_set_buffer_type(opal_buffer_t *buffer, opal_dss_buffer_type_t type) { /** check for error */ if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /** see if the buffer is empty - if not, generate error */ if (buffer->base_ptr != buffer->pack_ptr) { - ORTE_ERROR_LOG(ORTE_ERR_BUFFER); - return ORTE_ERR_BUFFER; + return OPAL_ERR_BUFFER; } /** set the type */ buffer->type = type; - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/orte/dss/dss_size.c b/opal/dss/dss_size.c similarity index 50% rename from orte/dss/dss_size.c rename to opal/dss/dss_size.c index fd55c0a386..c132900f57 100644 --- a/orte/dss/dss_size.c +++ b/opal/dss/dss_size.c @@ -16,112 +16,83 @@ * $HEADER$ */ -#include "orte_config.h" +#include "opal_config.h" -#include -#if HAVE_NETINET_IN_H -#include -#endif +#include "opal/dss/dss_internal.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" - -int orte_dss_size(size_t *size, void *src, orte_data_type_t type) +int opal_dss_size(size_t *size, void *src, opal_data_type_t type) { - int rc; - orte_dss_type_info_t *info; + opal_dss_type_info_t *info; /* check for error */ if (NULL == size) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* Lookup the size function for this type and call it */ - if (!(type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNKNOWN_DATA_TYPE; } - if (ORTE_SUCCESS != (rc = info->odti_size_fn(size, src, type))) { - ORTE_ERROR_LOG(rc); - } - - - return rc; + return info->odti_size_fn(size, src, type); } /* * STANDARD SIZE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED */ -int orte_dss_std_size(size_t *size, void *src, orte_data_type_t type) +int opal_dss_std_size(size_t *size, void *src, opal_data_type_t type) { switch(type) { - case ORTE_BOOL: + case OPAL_BOOL: *size = sizeof(bool); break; - case ORTE_INT: - case ORTE_UINT: + case OPAL_INT: + case OPAL_UINT: *size = sizeof(int); break; - case ORTE_SIZE: + case OPAL_SIZE: *size = sizeof(size_t); break; - case ORTE_PID: + case OPAL_PID: *size = sizeof(pid_t); break; - case ORTE_BYTE: - case ORTE_INT8: - case ORTE_UINT8: - case ORTE_NULL: + case OPAL_BYTE: + case OPAL_INT8: + case OPAL_UINT8: + case OPAL_NULL: *size = 1; break; - case ORTE_INT16: - case ORTE_UINT16: + case OPAL_INT16: + case OPAL_UINT16: *size = sizeof(uint16_t); break; - case ORTE_INT32: - case ORTE_UINT32: + case OPAL_INT32: + case OPAL_UINT32: *size = sizeof(uint32_t); break; - case ORTE_INT64: - case ORTE_UINT64: + case OPAL_INT64: + case OPAL_UINT64: *size = sizeof(uint64_t); break; - case ORTE_STD_CNTR: - *size = sizeof(orte_std_cntr_t); + case OPAL_DATA_TYPE: + *size = sizeof(opal_data_type_t); break; - case ORTE_DATA_TYPE: - *size = sizeof(orte_data_type_t); - break; - -#if OPAL_ENABLE_FT == 1 - case ORTE_CKPT_CMD: - *size = sizeof(size_t); - break; -#endif - default: *size = 0; - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; + return OPAL_ERR_UNKNOWN_DATA_TYPE; } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* SIZE FUNCTIONS FOR NON-STANDARD SYSTEM TYPES */ @@ -129,7 +100,7 @@ int orte_dss_std_size(size_t *size, void *src, orte_data_type_t type) /* * STRING */ -int orte_dss_size_string(size_t *size, char *src, orte_data_type_t type) +int opal_dss_size_string(size_t *size, char *src, opal_data_type_t type) { if (NULL != src) { *size = strlen(src) + 1; @@ -137,48 +108,47 @@ int orte_dss_size_string(size_t *size, char *src, orte_data_type_t type) *size = sizeof(char*); /* account for NULL */ } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } -/* SIZE FUNCTIONS FOR GENERIC ORTE TYPES */ +/* SIZE FUNCTIONS FOR GENERIC OPAL TYPES */ /* - * ORTE_DATA_VALUE + * OPAL_DATA_VALUE */ -int orte_dss_size_data_value(size_t *size, orte_data_value_t *src, orte_data_type_t type) +int opal_dss_size_data_value(size_t *size, opal_dss_value_t *src, opal_data_type_t type) { size_t data_size; int rc; /* account for size of object itself... */ - *size = sizeof(orte_data_value_t); + *size = sizeof(opal_dss_value_t); if (NULL != src) { /* ...and the number of bytes in the payload, IF an actual object was provided */ - if (ORTE_SUCCESS != (rc = orte_dss.size(&data_size, src->data, src->type))) { - ORTE_ERROR_LOG(rc); + if (OPAL_SUCCESS != (rc = opal_dss.size(&data_size, src->data, src->type))) { return rc; } *size += data_size; } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } /* - * ORTE_BYTE_OBJECT + * OPAL_BYTE_OBJECT */ -int orte_dss_size_byte_object(size_t *size, orte_byte_object_t *src, orte_data_type_t type) +int opal_dss_size_byte_object(size_t *size, opal_byte_object_t *src, opal_data_type_t type) { /* account for size of object itself... */ - *size = sizeof(orte_byte_object_t); + *size = sizeof(opal_byte_object_t); if (NULL != src) { /* ...and the number of bytes in the payload, IF an actual object was provided */ *size += src->size; } - return ORTE_SUCCESS; + return OPAL_SUCCESS; } diff --git a/opal/dss/dss_types.h b/opal/dss/dss_types.h new file mode 100644 index 0000000000..2722c7fa20 --- /dev/null +++ b/opal/dss/dss_types.h @@ -0,0 +1,147 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Buffer management types. + */ + +#ifndef OPAL_DSS_TYPES_H_ +#define OPAL_DSS_TYPES_H_ + +#include "opal_config.h" +#include "opal/types.h" + +#include "opal/class/opal_object.h" + +BEGIN_C_DECLS + +typedef uint8_t opal_data_type_t; /** data type indicators */ +#define OPAL_DATA_TYPE_T OPAL_UINT8 +#define OPAL_DSS_ID_MAX UINT8_MAX +#define OPAL_DSS_ID_INVALID OPAL_DSS_ID_MAX + +/* define a structure to hold generic byte objects */ +typedef struct { + int32_t size; + uint8_t *bytes; +} opal_byte_object_t; + +/* Type defines for packing and unpacking */ +#define OPAL_UNDEF (opal_data_type_t) 0 /**< type hasn't been defined yet */ +#define OPAL_BYTE (opal_data_type_t) 1 /**< a byte of data */ +#define OPAL_BOOL (opal_data_type_t) 2 /**< boolean */ +#define OPAL_STRING (opal_data_type_t) 3 /**< a NULL terminated string */ +#define OPAL_SIZE (opal_data_type_t) 4 /**< the generic size_t */ +#define OPAL_PID (opal_data_type_t) 5 /**< process pid */ + /* all the integer flavors */ +#define OPAL_INT (opal_data_type_t) 6 /**< generic integer */ +#define OPAL_INT8 (opal_data_type_t) 7 /**< an 8-bit integer */ +#define OPAL_INT16 (opal_data_type_t) 8 /**< a 16-bit integer */ +#define OPAL_INT32 (opal_data_type_t) 9 /**< a 32-bit integer */ +#define OPAL_INT64 (opal_data_type_t) 10 /**< a 64-bit integer */ + /* all the unsigned integer flavors */ +#define OPAL_UINT (opal_data_type_t) 11 /**< generic unsigned integer */ +#define OPAL_UINT8 (opal_data_type_t) 12 /**< an 8-bit unsigned integer */ +#define OPAL_UINT16 (opal_data_type_t) 13 /**< a 16-bit unsigned integer */ +#define OPAL_UINT32 (opal_data_type_t) 14 /**< a 32-bit unsigned integer */ +#define OPAL_UINT64 (opal_data_type_t) 15 /**< a 64-bit unsigned integer */ + /* we don't support floating point types */ + /* General types */ +#define OPAL_BYTE_OBJECT (opal_data_type_t) 16 /**< byte object structure */ +#define OPAL_DATA_TYPE (opal_data_type_t) 17 /**< data type */ +#define OPAL_NULL (opal_data_type_t) 18 /**< don't interpret data type */ +#define OPAL_DATA_VALUE (opal_data_type_t) 19 /**< data value */ + +#define OPAL_DSS_ID_DYNAMIC (opal_data_type_t) 20 + +/* define the results values for comparisons so we can change them in only one place */ +#define OPAL_VALUE1_GREATER +1 +#define OPAL_VALUE2_GREATER -1 +#define OPAL_EQUAL 0 + +/* define arithmetic operations for readability */ +typedef uint8_t opal_dss_arith_op_t; + +#define OPAL_DSS_ADD 1 +#define OPAL_DSS_SUB 2 +#define OPAL_DSS_MUL 3 +#define OPAL_DSS_DIV 4 + + +/* Data value object */ +typedef struct { + opal_object_t super; /* required for this to be an object */ + opal_data_type_t type; /* the type of value stored */ + void *data; +} opal_dss_value_t; +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_dss_value_t); + +#define OPAL_DATA_VALUE_EMPTY { OPAL_OBJ_STATIC_INIT(opal_dss_value_t), OPAL_UNDEF, NULL} + +/* structured-unstructured data flags */ +#define OPAL_DSS_STRUCTURED true +#define OPAL_DSS_UNSTRUCTURED false + +/** + * buffer type + */ +typedef uint8_t opal_dss_buffer_type_t; +#define OPAL_DSS_BUFFER_NON_DESC 0x00 +#define OPAL_DSS_BUFFER_FULLY_DESC 0x01 + +#define OPAL_DSS_BUFFER_TYPE_HTON(h); +#define OPAL_DSS_BUFFER_TYPE_NTOH(h); + +/** + * Structure for holding a buffer to be used with the RML or OOB + * subsystems. + */ + struct opal_buffer_t { + /** First member must be the object's parent */ + opal_object_t parent; + /** type of buffer */ + opal_dss_buffer_type_t type; + /** Start of my memory */ + char *base_ptr; + /** Where the next data will be packed to (within the allocated + memory starting at base_ptr) */ + char *pack_ptr; + /** Where the next data will be unpacked from (within the + allocated memory starting as base_ptr) */ + char *unpack_ptr; + + /** Number of bytes allocated (starting at base_ptr) */ + size_t bytes_allocated; + /** Number of bytes used by the buffer (i.e., amount of data -- + including overhead -- packed in the buffer) */ + size_t bytes_used; + }; + /** + * Convenience typedef + */ + typedef struct opal_buffer_t opal_buffer_t; + + /** formalize the declaration */ + OPAL_DECLSPEC OBJ_CLASS_DECLARATION (opal_buffer_t); + +END_C_DECLS + +#endif /* OPAL_DSS_TYPES_H */ diff --git a/opal/dss/dss_unpack.c b/opal/dss/dss_unpack.c new file mode 100644 index 0000000000..79602fef12 --- /dev/null +++ b/opal/dss/dss_unpack.c @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/util/output.h" +#include "opal/dss/dss_internal.h" + +int opal_dss_unpack(opal_buffer_t *buffer, void *dst, int32_t *num_vals, + opal_data_type_t type) +{ + int rc, ret; + int32_t local_num, n=1; + opal_data_type_t local_type; + + /* check for error */ + if (NULL == buffer || NULL == dst || NULL == num_vals) { + return OPAL_ERR_BAD_PARAM; + } + + /* if user provides a zero for num_vals, then there is no storage allocated + * so return an appropriate error + */ + if (0 == *num_vals) { + return OPAL_ERR_UNPACK_INADEQUATE_SPACE; + } + + /** Unpack the declared number of values + * REMINDER: it is possible that the buffer is corrupted and that + * the DSS will *think* there is a proper int32_t variable at the + * beginning of the unpack region - but that the value is bogus (e.g., just + * a byte field in a string array that so happens to have a value that + * matches the int32_t data type flag). Therefore, this error check is + * NOT completely safe. This is true for ALL unpack functions, not just + * int32_t as used here. + */ + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + if (OPAL_SUCCESS != ( + rc = opal_dss_get_data_type(buffer, &local_type))) { + *num_vals = 0; + return rc; + } + if (OPAL_INT32 != local_type) { /* if the length wasn't first, then error */ + *num_vals = 0; + return OPAL_ERR_UNPACK_FAILURE; + } + } + + n=1; + if (OPAL_SUCCESS != (rc = opal_dss_unpack_int32(buffer, &local_num, &n, OPAL_INT32))) { + *num_vals = 0; + return rc; + } + + /** if the storage provided is inadequate, set things up + * to unpack as much as we can and to return an error code + * indicating that everything was not unpacked - the buffer + * is left in a state where it can not be further unpacked. + */ + if (local_num > *num_vals) { + local_num = *num_vals; + ret = OPAL_ERR_UNPACK_INADEQUATE_SPACE; + } else { /** enough or more than enough storage */ + *num_vals = local_num; /** let the user know how many we actually unpacked */ + ret = OPAL_SUCCESS; + } + + /** Unpack the value(s) */ + if (OPAL_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dst, &local_num, type))) { + *num_vals = 0; + ret = rc; + } + + return ret; +} + +int opal_dss_unpack_buffer(opal_buffer_t *buffer, void *dst, int32_t *num_vals, + opal_data_type_t type) +{ + int rc; + opal_data_type_t local_type; + opal_dss_type_info_t *info; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_buffer( %p, %p, %lu, %d )\n", + (void*)buffer, dst, (long unsigned int)*num_vals, (int)type ) ); + + /** Unpack the declared data type */ + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + if (OPAL_SUCCESS != (rc = opal_dss_get_data_type(buffer, &local_type))) { + return rc; + } + /* if the data types don't match, then return an error */ + if (type != local_type) { + return OPAL_ERR_PACK_MISMATCH; + } + } + + /* Lookup the unpack function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, type))) { + return OPAL_ERR_UNPACK_FAILURE; + } + + return info->odti_unpack_fn(buffer, dst, num_vals, type); +} + + +/* UNPACK GENERIC SYSTEM TYPES */ + +/* + * BOOL + */ +int opal_dss_unpack_bool(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + opal_data_type_t remote_type; + + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + /* see what type was actually packed */ + if (OPAL_SUCCESS != (ret = opal_dss_peek_type(buffer, &remote_type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &remote_type))) { + return ret; + } + } + + if (remote_type == DSS_TYPE_BOOL) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_BOOL))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(bool, remote_type, ret); + } + return ret; +} + +/* + * INT + */ +int opal_dss_unpack_int(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + opal_data_type_t remote_type; + + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + /* see what type was actually packed */ + if (OPAL_SUCCESS != (ret = opal_dss_peek_type(buffer, &remote_type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &remote_type))) { + return ret; + } + } + + if (remote_type == DSS_TYPE_INT) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_INT))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(int, remote_type, ret); + } + + return ret; +} + +/* + * SIZE_T + */ +int opal_dss_unpack_sizet(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + opal_data_type_t remote_type; + + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + /* see what type was actually packed */ + if (OPAL_SUCCESS != (ret = opal_dss_peek_type(buffer, &remote_type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &remote_type))) { + return ret; + } + } + + if (remote_type == DSS_TYPE_SIZE_T) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_SIZE_T))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(size_t, remote_type, ret); + } + + return ret; +} + +/* + * PID_T + */ +int opal_dss_unpack_pid(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + opal_data_type_t remote_type; + + if (OPAL_DSS_BUFFER_FULLY_DESC == buffer->type) { + /* see what type was actually packed */ + if (OPAL_SUCCESS != (ret = opal_dss_peek_type(buffer, &remote_type))) { + return ret; + } + } else { + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &remote_type))) { + return ret; + } + } + + if (remote_type == DSS_TYPE_PID_T) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_PID_T))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(pid_t, remote_type, ret); + } + + return ret; +} + + +/* UNPACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ + +/* + * NULL + */ +int opal_dss_unpack_null(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_null * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, *num_vals)) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + memcpy(dest, buffer->unpack_ptr, *num_vals); + + /* update buffer pointer */ + buffer->unpack_ptr += *num_vals; + + return OPAL_SUCCESS; +} + +/* + * BYTE, CHAR, INT8 + */ +int opal_dss_unpack_byte(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_byte * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, *num_vals)) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + memcpy(dest, buffer->unpack_ptr, *num_vals); + + /* update buffer pointer */ + buffer->unpack_ptr += *num_vals; + + return OPAL_SUCCESS; +} + +int opal_dss_unpack_int16(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int32_t i; + uint16_t tmp, *desttmp = (uint16_t*) dest; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_int16 * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); + desttmp[i] = ntohs(tmp); + buffer->unpack_ptr += sizeof(tmp); + } + + return OPAL_SUCCESS; +} + +int opal_dss_unpack_int32(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int32_t i; + uint32_t tmp, *desttmp = (uint32_t*) dest; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_int32 * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); + desttmp[i] = ntohl(tmp); + buffer->unpack_ptr += sizeof(tmp); + } + + return OPAL_SUCCESS; +} + +int opal_dss_unpack_int64(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int32_t i; + uint64_t tmp, *desttmp = (uint64_t*) dest; + + OPAL_OUTPUT( ( opal_dss_verbose, "opal_dss_unpack_int64 * %d\n", (int)*num_vals ) ); + /* check to see if there's enough data in buffer */ + if (opal_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); + desttmp[i] = ntoh64(tmp); + buffer->unpack_ptr += sizeof(tmp); + } + + return OPAL_SUCCESS; +} + +int opal_dss_unpack_string(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + int32_t i, len, n=1; + char **sdest = (char**) dest; + + for (i = 0; i < (*num_vals); ++i) { + if (OPAL_SUCCESS != (ret = opal_dss_unpack_int32(buffer, &len, &n, OPAL_INT32))) { + return ret; + } + if (0 == len) { /* zero-length string - unpack the NULL */ + sdest[i] = NULL; + } else { + sdest[i] = (char*)malloc(len); + if (NULL == sdest[i]) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + if (OPAL_SUCCESS != (ret = opal_dss_unpack_byte(buffer, sdest[i], &len, OPAL_BYTE))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} + + +/* UNPACK FUNCTIONS FOR GENERIC OPAL TYPES */ + +/* + * OPAL_DATA_TYPE + */ +int opal_dss_unpack_data_type(opal_buffer_t *buffer, void *dest, int32_t *num_vals, + opal_data_type_t type) +{ + /* turn around and unpack the real type */ + return opal_dss_unpack_buffer(buffer, dest, num_vals, OPAL_DATA_TYPE_T); +} + +/* + * OPAL_DATA_VALUE + */ +int opal_dss_unpack_data_value(opal_buffer_t *buffer, void *dest, int32_t *num, + opal_data_type_t type) +{ + opal_dss_type_info_t *info; + opal_dss_value_t **ddv; + int32_t i, n; + opal_data_type_t dt; + size_t nsize; + int ret; + + ddv = (opal_dss_value_t **) dest; + + for (i = 0; i < *num; ++i) { + /* see what the data type is */ + n = 1; + if (OPAL_SUCCESS != (ret = opal_dss_get_data_type(buffer, &dt))) { + return ret; + } + + /* if it is OPAL_NULL, then do nothing */ + if (OPAL_NULL == dt) continue; + + /* otherwise, allocate the new object and set the type */ + + ddv[i] = OBJ_NEW(opal_dss_value_t); + if (NULL == ddv[i]) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + ddv[i]->type = dt; + + /* if it is UNDEF, then nothing more to do */ + if (OPAL_UNDEF == ddv[i]->type) continue; + + /* get enough memory to hold it */ + if (OPAL_SUCCESS != (ret = opal_dss.size(&nsize, NULL, ddv[i]->type))) { + return ret; + } + ddv[i]->data = (void*)malloc(nsize); + if (NULL == ddv[i]->data) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* Lookup the unpack function for this type and call it */ + + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, ddv[i]->type))) { + return OPAL_ERR_PACK_FAILURE; + } + + if (info->odti_structured) { + n=1; + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, &(ddv[i]->data), &n, ddv[i]->type))) { + return ret; + } + } else { + n=1; + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, ddv[i]->data, &n, ddv[i]->type))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} + + +/* + * OPAL_BYTE_OBJECT + */ +int opal_dss_unpack_byte_object(opal_buffer_t *buffer, void *dest, int32_t *num, + opal_data_type_t type) +{ + int ret; + int32_t i, n, m=1; + opal_byte_object_t **dbyteptr; + + dbyteptr = (opal_byte_object_t**)dest; + n = *num; + for(i=0; isize), &m, OPAL_INT32))) { + return ret; + } + if (0 < dbyteptr[i]->size) { + dbyteptr[i]->bytes = (uint8_t*)malloc(dbyteptr[i]->size); + if (NULL == dbyteptr[i]->bytes) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + if (OPAL_SUCCESS != (ret = opal_dss_unpack_byte(buffer, (dbyteptr[i]->bytes), + &(dbyteptr[i]->size), OPAL_BYTE))) { + return ret; + } + } + } + + return OPAL_SUCCESS; +} diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index be2b69275b..6d85734a34 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -46,7 +46,18 @@ enum { OPAL_ERR_VALUE_OUT_OF_BOUNDS = (OPAL_ERR_BASE - 18), OPAL_ERR_FILE_READ_FAILURE = (OPAL_ERR_BASE - 19), OPAL_ERR_FILE_WRITE_FAILURE = (OPAL_ERR_BASE - 20), - OPAL_ERR_FILE_OPEN_FAILURE = (OPAL_ERR_BASE - 21) + OPAL_ERR_FILE_OPEN_FAILURE = (OPAL_ERR_BASE - 21), + OPAL_ERR_PACK_MISMATCH = (OPAL_ERR_BASE - 22), + OPAL_ERR_PACK_FAILURE = (OPAL_ERR_BASE - 23), + OPAL_ERR_UNPACK_FAILURE = (OPAL_ERR_BASE - 24), + OPAL_ERR_UNPACK_INADEQUATE_SPACE = (OPAL_ERR_BASE - 25), + OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER = (OPAL_ERR_BASE - 26), + OPAL_ERR_TYPE_MISMATCH = (OPAL_ERR_BASE - 27), + OPAL_ERR_OPERATION_UNSUPPORTED = (OPAL_ERR_BASE - 28), + OPAL_ERR_UNKNOWN_DATA_TYPE = (OPAL_ERR_BASE - 29), + OPAL_ERR_BUFFER = (OPAL_ERR_BASE - 30), + OPAL_ERR_DATA_TYPE_REDEF = (OPAL_ERR_BASE - 31), + OPAL_ERR_DATA_OVERWRITE_ATTEMPT = (OPAL_ERR_BASE - 32) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/mca/base/base.h b/opal/mca/base/base.h index 5fe72108ed..23b172cc1c 100644 --- a/opal/mca/base/base.h +++ b/opal/mca/base/base.h @@ -108,6 +108,7 @@ OPAL_DECLSPEC int mca_base_component_compare(const mca_base_component_t *a, const mca_base_component_t *b); OPAL_DECLSPEC int mca_base_component_compatible(const mca_base_component_t *a, const mca_base_component_t *b); +OPAL_DECLSPEC char * mca_base_component_to_string(const mca_base_component_t *a); /* mca_base_component_find.c */ diff --git a/opal/mca/base/mca_base_component_compare.c b/opal/mca/base/mca_base_component_compare.c index 34b8a59689..5e89028fdc 100644 --- a/opal/mca/base/mca_base_component_compare.c +++ b/opal/mca/base/mca_base_component_compare.c @@ -136,4 +136,17 @@ int mca_base_component_compatible( return 0; } +/** + * Returns a string which represents the component name and version. + * Has the form: comp_type.comp_name.major_version.minor_version + */ +char * mca_base_component_to_string(const mca_base_component_t *a) { + char * str = NULL; + if(0 > asprintf(&str, "%s.%s.%d.%d", a->mca_type_name, + a->mca_component_name, a->mca_component_major_version, + a->mca_component_minor_version)) { + return NULL; + } + return str; +} diff --git a/opal/runtime/opal_finalize.c b/opal/runtime/opal_finalize.c index 0cb4f6aee7..ae72421a6c 100644 --- a/opal/runtime/opal_finalize.c +++ b/opal/runtime/opal_finalize.c @@ -21,6 +21,7 @@ #include "opal_config.h" #include "opal/class/opal_object.h" +#include "opal/dss/dss.h" #include "opal/util/trace.h" #include "opal/util/output.h" #include "opal/util/malloc.h" @@ -81,6 +82,9 @@ opal_finalize_util(void) the malloc code turning off doesn't affect opal_output that much */ opal_output_finalize(); + + /* close the dss */ + opal_dss_close(); /* finalize the class/object system */ opal_class_finalize(); diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 5db5528879..e814fc11e5 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -36,6 +36,7 @@ #include "opal/mca/paffinity/base/base.h" #include "opal/mca/timer/base/base.h" #include "opal/mca/memchecker/base/base.h" +#include "opal/dss/dss.h" #include "opal/runtime/opal_cr.h" #include "opal/mca/crs/base/base.h" @@ -133,6 +134,36 @@ opal_err2str(int errnum) case OPAL_ERR_FILE_OPEN_FAILURE: retval = "File open failure"; break; + case OPAL_ERR_PACK_MISMATCH: + retval = "Pack data mismatch"; + break; + case OPAL_ERR_PACK_FAILURE: + retval = "Data pack failed"; + break; + case OPAL_ERR_UNPACK_FAILURE: + retval = "Data unpack failed"; + break; + case OPAL_ERR_UNPACK_INADEQUATE_SPACE: + retval = "Data unpack had inadequate space"; + break; + case OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER: + retval = "Data unpack would read past end of buffer"; + break; + case OPAL_ERR_OPERATION_UNSUPPORTED: + retval = "Requested operation is not supported on referenced data type"; + break; + case OPAL_ERR_UNKNOWN_DATA_TYPE: + retval = "Unknown data type"; + break; + case OPAL_ERR_BUFFER: + retval = "Buffer error"; + break; + case OPAL_ERR_DATA_TYPE_REDEF: + retval = "Attempt to redefine an existing data type"; + break; + case OPAL_ERR_DATA_OVERWRITE_ATTEMPT: + retval = "Attempt to overwrite a data value"; + break; default: retval = NULL; } @@ -212,6 +243,13 @@ opal_init_util(void) goto return_error; } + /* + * Initialize the data storage service. + */ + if (OPAL_SUCCESS != (ret = opal_dss_open())) { + error = "opal_dss_open"; + goto return_error; + } return OPAL_SUCCESS; return_error: diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index 2b6cb68274..738879b7f1 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -43,13 +43,14 @@ headers = \ numtostr.h \ num_procs.h \ opal_environ.h \ + opal_getcwd.h \ + opal_pty.h \ os_dirpath.h \ os_path.h \ output.h \ path.h \ pow2.h \ printf.h \ - opal_pty.h \ qsort.h \ show_help.h \ show_help_lex.h \ @@ -75,13 +76,14 @@ libopalutil_la_SOURCES = \ numtostr.c \ num_procs.c \ opal_environ.c \ + opal_getcwd.c \ + opal_pty.c \ os_dirpath.c \ os_path.c \ output.c \ path.c \ pow2.c \ printf.c \ - opal_pty.c \ qsort.c \ show_help.c \ show_help_lex.l \ diff --git a/opal/util/opal_getcwd.c b/opal/util/opal_getcwd.c new file mode 100644 index 0000000000..14f454ec17 --- /dev/null +++ b/opal/util/opal_getcwd.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_STRING_H +#include +#endif + +#include "opal/util/opal_getcwd.h" +#include "opal/constants.h" + + +/* + * Use $PWD instead of getcwd() a) if $PWD exists and b) is a valid + * synonym for the results from getcwd(). If both of these conditions + * are not met, just fall back and use the results of getcwd(). + */ +int opal_getcwd(char *buf, size_t size) +{ + char cwd[OMPI_PATH_MAX]; + char *pwd = getenv("PWD"); + struct stat a, b; + + /* Bozo checks (e.g., if someone accidentally passed -1 to the + unsigned "size" param) */ + if (NULL == buf || size > INT_MAX) { + return OPAL_ERR_BAD_PARAM; + } + + /* Call getcwd() to get a baseline result */ + if (NULL == getcwd(cwd, sizeof(cwd))) { + return OPAL_ERR_IN_ERRNO; + } + +#if !defined(HAVE_SYS_STAT_H) + /* If we don't have stat(), then we can't tell if the $PWD and cwd + are the same, so just fall back to getcwd(). */ + pwd = cwd; +#else + if (NULL == pwd) { + pwd = cwd; + } else { + /* If the two are not the same value, figure out if they are + pointing to the same place */ + if (0 != strcmp(pwd, cwd)) { + /* If we can't stat() what getcwd() gave us, give up */ + if (0 != stat(cwd, &a)) { + return OPAL_ERR_IN_ERRNO; + } + /* If we can't stat() $PWD, then $PWD could just be stale + -- so ignore it. */ + else if (0 != stat(pwd, &b)) { + pwd = cwd; + } + /* Otherwise, we successfully stat()'ed them both, so + compare. If either the device or inode is not the + same, then fallback to getcwd(). */ + else { + if (a.st_dev != b.st_dev || a.st_ino != b.st_ino) { + pwd = cwd; + } + } + } + } +#endif + + /* If we got here, pwd is pointing to the result that we want to + give. Ensure the user's buffer is long enough. If it is, copy + in the value and be done. */ + if (strlen(pwd) > size) { + return OPAL_ERR_TEMP_OUT_OF_RESOURCE; + } + strcpy(buf, pwd); + return OPAL_SUCCESS; +} diff --git a/opal/util/opal_getcwd.h b/opal/util/opal_getcwd.h new file mode 100644 index 0000000000..beb0909c56 --- /dev/null +++ b/opal/util/opal_getcwd.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * Per https://svn.open-mpi.org/trac/ompi/ticket/933, use a + * combination of $PWD and getcwd() to find the current working + * directory. + */ + +#ifndef OPAL_GETCWD_H +#define OPAL_GETCWD_H + +BEGIN_C_DECLS + +/** + * Per https://svn.open-mpi.org/trac/ompi/ticket/933, use a + * combination of $PWD and getcwd() to find the current working + * directory. + * + * Use $PWD instead of getcwd() a) if $PWD exists and b) is a valid + * synonym for the results from getcwd(). If both of these conditions + * are not met, just fall back and use the results of getcwd(). + * + * @param buf Caller-allocated buffer to put the result + * @param size Length of the buf array + * + * @retval OPAL_ERR_OUT_OF_RESOURCE If internal malloc() fails. + * @retval OPAL_ERR_TEMP_OUT_OF_RESOURCE If the supplied buf buffer + * was not long enough to handle the result. + * @retval OPAL_ERR_BAD_PARAM If buf is NULL or size>INT_MAX + * @retval OPAL_ERR_IN_ERRNO If an other error occurred + * @retval OPAL_SUCCESS If all went well and a valid value was placed + * in the buf buffer. + */ +OPAL_DECLSPEC int opal_getcwd(char *buf, size_t size); + + +END_C_DECLS + +#endif /* OPAL_GETCWD_H */ diff --git a/opal/util/output.c b/opal/util/output.c index c20b539385..85174f730c 100644 --- a/opal/util/output.c +++ b/opal/util/output.c @@ -726,3 +726,12 @@ static void output(int output_id, const char *format, va_list arglist) free(str); } } + +int opal_output_get_verbosity(int output_id) +{ + if (output_id >= 0 && output_id < OPAL_OUTPUT_MAX_STREAMS && info[output_id].ldi_used) { + return info[output_id].ldi_verbose_level; + } else { + return -1; + } +} diff --git a/opal/util/output.h b/opal/util/output.h index 46dddb9c0c..12423715fa 100644 --- a/opal/util/output.h +++ b/opal/util/output.h @@ -73,9 +73,7 @@ #include "opal/class/opal_object.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /** * \class opal_output_stream_t @@ -392,6 +390,14 @@ struct opal_output_stream_t { */ OPAL_DECLSPEC void opal_output_set_verbosity(int output_id, int level); + /** + * Get the verbosity level for a stream + * + * @param output_id Stream id returned from opal_output_open() + * @returns Verbosity of stream + */ + OPAL_DECLSPEC int opal_output_get_verbosity(int output_id); + /** * Set characteristics for output files. * @@ -485,9 +491,7 @@ struct opal_output_stream_t { */ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_output_stream_t); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif /* OPAL_OUTPUT_H_ */ diff --git a/opal/util/printf.h b/opal/util/printf.h index a23cacd669..bca3a58d75 100644 --- a/opal/util/printf.h +++ b/opal/util/printf.h @@ -29,10 +29,7 @@ #include #include -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - +BEGIN_C_DECLS /** * Writes to a string under the control of a format string @@ -127,9 +124,7 @@ OPAL_DECLSPEC int opal_asprintf(char **ptr, const char *fmt, ...) __opal_attrib OPAL_DECLSPEC int opal_vasprintf(char **ptr, const char *fmt, va_list ap) __opal_attribute_format__(__printf__, 2, 0); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif /* OPAL_PRINTF_H */ diff --git a/orte/Makefile.am b/orte/Makefile.am index 70017ea238..fd6512a536 100644 --- a/orte/Makefile.am +++ b/orte/Makefile.am @@ -35,7 +35,6 @@ DIST_SUBDIRS = \ lib_LTLIBRARIES = libopen-rte.la libopen_rte_la_SOURCES = libopen_rte_la_LIBADD = \ - dss/libdss.la \ $(MCA_orte_FRAMEWORK_LIBS) \ $(top_ompi_builddir)/opal/libopen-pal.la libopen_rte_la_DEPENDENCIES = $(libopen_rte_la_LIBADD) @@ -58,7 +57,6 @@ ortedir = $(includedir) endif include class/Makefile.am -include dss/Makefile.am include runtime/Makefile.am include util/Makefile.am include tools/Makefile.am diff --git a/orte/class/orte_pointer_array.c b/orte/class/orte_pointer_array.c index 06926fb1ac..275248333e 100644 --- a/orte/class/orte_pointer_array.c +++ b/orte/class/orte_pointer_array.c @@ -17,13 +17,13 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include #include #include #include -#include "orte/orte_constants.h" #include "orte/class/orte_pointer_array.h" #include "opal/util/output.h" diff --git a/orte/class/orte_pointer_array.h b/orte/class/orte_pointer_array.h index 5ad92c1813..e0e16aaa6d 100644 --- a/orte/class/orte_pointer_array.h +++ b/orte/class/orte_pointer_array.h @@ -25,7 +25,7 @@ #define ORTE_POINTER_ARRAY_H #include "orte_config.h" -#include "orte/orte_types.h" +#include "orte/types.h" #if HAVE_STRING_H #include @@ -34,9 +34,7 @@ #include "opal/threads/mutex.h" #include "opal/class/opal_object.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /** * dynamic pointer array @@ -225,7 +223,6 @@ static inline void orte_pointer_array_free_clear(orte_pointer_array_t *array) ORTE_DECLSPEC bool orte_pointer_array_test_and_set_item (orte_pointer_array_t *table, orte_std_cntr_t element_index, void *value); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS + #endif /* OMPI_POINTER_ARRAY_H */ diff --git a/orte/class/orte_proc_table.c b/orte/class/orte_proc_table.c index aa566ab984..c3bdd019af 100644 --- a/orte/class/orte_proc_table.c +++ b/orte/class/orte_proc_table.c @@ -17,15 +17,18 @@ */ #include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" #include #include -#include "orte/orte_constants.h" #include "opal/util/output.h" + +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "orte/class/orte_proc_table.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/ns/ns_types.h" /* * orte_process_name_hash_node_t @@ -45,6 +48,19 @@ static OBJ_CLASS_INSTANCE( NULL, NULL); +typedef struct +{ + opal_list_item_t super; + orte_process_name_t hn_key; + orte_process_name_t hn_value; +} orte_proc_hash_node_name_t; + +static OBJ_CLASS_INSTANCE( + orte_proc_hash_node_name_t, + opal_list_item_t, + NULL, + NULL); + #define GET_KEY(proc) \ ( (((uint32_t) proc->jobid) << 24) + ((uint32_t) proc->vpid) ) @@ -65,7 +81,7 @@ void* orte_hash_table_get_proc(opal_hash_table_t* ht, for(node = (orte_proc_hash_node_t*)opal_list_get_first(list); node != (orte_proc_hash_node_t*)opal_list_get_end(list); node = (orte_proc_hash_node_t*)opal_list_get_next(node)) { - if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { return node->hn_value; } } @@ -92,7 +108,7 @@ int orte_hash_table_set_proc( for(node = (orte_proc_hash_node_t*)opal_list_get_first(list); node != (orte_proc_hash_node_t*)opal_list_get_end(list); node = (orte_proc_hash_node_t*)opal_list_get_next(node)) { - if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { node->hn_value = value; return ORTE_SUCCESS; } @@ -130,7 +146,100 @@ int orte_hash_table_remove_proc( for(node = (orte_proc_hash_node_t*)opal_list_get_first(list); node != (orte_proc_hash_node_t*)opal_list_get_end(list); node = (orte_proc_hash_node_t*)opal_list_get_next(node)) { - if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) { + opal_list_remove_item(list, (opal_list_item_t*)node); + opal_list_append(&ht->ht_nodes, (opal_list_item_t*)node); + ht->ht_size--; + return ORTE_SUCCESS; + } + } + return ORTE_ERR_NOT_FOUND; +} + +orte_process_name_t orte_hash_table_get_proc_name(opal_hash_table_t* ht, + const orte_process_name_t* proc, + orte_ns_cmp_bitmask_t mask) +{ + uint32_t key = GET_KEY(proc); + opal_list_t* list = ht->ht_table + (key & ht->ht_mask); + orte_proc_hash_node_name_t *node; + +#if OMPI_ENABLE_DEBUG + if(ht->ht_table_size == 0) { + opal_output(0, "opal_hash_table_get_proc_name:" + "opal_hash_table_init() has not been called"); + return *ORTE_NAME_INVALID; + } +#endif + for(node = (orte_proc_hash_node_name_t*)opal_list_get_first(list); + node != (orte_proc_hash_node_name_t*)opal_list_get_end(list); + node = (orte_proc_hash_node_name_t*)opal_list_get_next(node)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &node->hn_key, proc)) { + return node->hn_value; + } + } + return *ORTE_NAME_INVALID; +} + + +int orte_hash_table_set_proc_name(opal_hash_table_t* ht, + const orte_process_name_t* proc, + const orte_process_name_t *value, + orte_ns_cmp_bitmask_t mask) +{ + uint32_t key = GET_KEY(proc); + opal_list_t* list = ht->ht_table + (key & ht->ht_mask); + orte_proc_hash_node_name_t *node; + +#if OMPI_ENABLE_DEBUG + if(ht->ht_table_size == 0) { + opal_output(0, "opal_hash_table_set_proc_name:" + "opal_hash_table_init() has not been called"); + return ORTE_ERR_BAD_PARAM; + } +#endif + for(node = (orte_proc_hash_node_name_t*)opal_list_get_first(list); + node != (orte_proc_hash_node_name_t*)opal_list_get_end(list); + node = (orte_proc_hash_node_name_t*)opal_list_get_next(node)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &node->hn_key, proc)) { + node->hn_value = *value; + return ORTE_SUCCESS; + } + } + + node = (orte_proc_hash_node_name_t*)opal_list_remove_first(&ht->ht_nodes); + if(NULL == node) { + node = OBJ_NEW(orte_proc_hash_node_name_t); + if(NULL == node) + return ORTE_ERR_OUT_OF_RESOURCE; + } + node->hn_key = *proc; + node->hn_value = *value; + opal_list_append(list, (opal_list_item_t*)node); + ht->ht_size++; + return ORTE_SUCCESS; +} + + +int orte_hash_table_remove_proc_name(opal_hash_table_t* ht, + const orte_process_name_t* proc, + orte_ns_cmp_bitmask_t mask) +{ + uint32_t key = GET_KEY(proc); + opal_list_t* list = ht->ht_table + (key & ht->ht_mask); + orte_proc_hash_node_name_t *node; + +#if OMPI_ENABLE_DEBUG + if(ht->ht_table_size == 0) { + opal_output(0, "opal_hash_table_remove_proc_name:" + "opal_hash_table_init() has not been called"); + return ORTE_ERR_BAD_PARAM; + } +#endif + for(node = (orte_proc_hash_node_name_t*)opal_list_get_first(list); + node != (orte_proc_hash_node_name_t*)opal_list_get_end(list); + node = (orte_proc_hash_node_name_t*)opal_list_get_next(node)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &node->hn_key, proc)) { opal_list_remove_item(list, (opal_list_item_t*)node); opal_list_append(&ht->ht_nodes, (opal_list_item_t*)node); ht->ht_size--; diff --git a/orte/class/orte_proc_table.h b/orte/class/orte_proc_table.h index 4e825984de..55e4996b15 100644 --- a/orte/class/orte_proc_table.h +++ b/orte/class/orte_proc_table.h @@ -25,13 +25,14 @@ #ifndef ORTE_PROC_TABLE_H #define ORTE_PROC_TABLE_H +#include "orte_config.h" +#include "orte/types.h" + #include "opal/class/opal_hash_table.h" -#include "orte/mca/ns/ns_types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +#include "orte/util/name_fns.h" +BEGIN_C_DECLS /** * Retrieve value via orte_process_name_t key. @@ -52,7 +53,7 @@ ORTE_DECLSPEC void *orte_hash_table_get_proc( * @param table The input hash table (IN). * @param key The input key (IN). * @param value The value to be associated with the key (IN). - * @return OMPI return code. + * @return ORTE return code. * */ @@ -66,7 +67,7 @@ ORTE_DECLSPEC int orte_hash_table_set_proc( * * @param table The input hash table (IN). * @param key The input key (IN). - * @return OMPI return code. + * @return ORTE return code. * */ @@ -75,9 +76,52 @@ ORTE_DECLSPEC int orte_hash_table_remove_proc( const orte_process_name_t* key); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +/** +* Retrieve process name via orte_process_name_t key. + * + * @param table The input hash table (IN). + * @param key The input key (IN). + * @return The process name associated with the key or + * ORTE_NAME_INVALID if the item is not found. + * + */ + +ORTE_DECLSPEC orte_process_name_t orte_hash_table_get_proc_name( + opal_hash_table_t* table, + const orte_process_name_t* key, + orte_ns_cmp_bitmask_t mask); + +/** +* Set process name based on uint32_t key. + * + * @param table The input hash table (IN). + * @param key The input key (IN). + * @param value The process name to be associated with the key (IN). + * @return ORTE return code. + * + */ + +ORTE_DECLSPEC int orte_hash_table_set_proc_name( + opal_hash_table_t* table, + const orte_process_name_t*, + const orte_process_name_t*, + orte_ns_cmp_bitmask_t mask); + +/** +* Remove process name based on uint32_t key. + * + * @param table The input hash table (IN). + * @param key The input key (IN). + * @return ORTE return code. + * + */ + +ORTE_DECLSPEC int orte_hash_table_remove_proc_name( + opal_hash_table_t* table, + const orte_process_name_t* key, + orte_ns_cmp_bitmask_t mask); + +END_C_DECLS #endif /* OMPI_HASH_TABLE_H */ diff --git a/orte/class/orte_value_array.c b/orte/class/orte_value_array.c index df8531948b..4db3559c38 100644 --- a/orte/class/orte_value_array.c +++ b/orte/class/orte_value_array.c @@ -17,8 +17,8 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" #include "orte/class/orte_value_array.h" diff --git a/orte/class/orte_value_array.h b/orte/class/orte_value_array.h index cb800c9d8e..9edf51435a 100644 --- a/orte/class/orte_value_array.h +++ b/orte/class/orte_value_array.h @@ -19,10 +19,11 @@ #ifndef ORTE_VALUE_ARRAY_H #define ORTE_VALUE_ARRAY_H -#include #include "orte_config.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" +#include "orte/constants.h" +#include "orte/types.h" + +#include #include "opal/class/opal_object.h" #if OMPI_ENABLE_DEBUG @@ -35,9 +36,8 @@ * See ompi_bitmap.h for an explanation of why there is a split * between OMPI and ORTE for this generic class. */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif + +BEGIN_C_DECLS struct orte_value_array_t { @@ -275,9 +275,8 @@ static inline int orte_value_array_remove_item(orte_value_array_t *array, orte_s #define ORTE_VALUE_ARRAY_GET_BASE(array, item_type) \ ((item_type*) ((array)->array_items)) -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS + #endif diff --git a/orte/dss/dss_arith.c b/orte/dss/dss_arith.c deleted file mode 100644 index 741a2be738..0000000000 --- a/orte/dss/dss_arith.c +++ /dev/null @@ -1,856 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/odls/odls_types.h" - -#include "orte/dss/dss_internal.h" - -static void orte_dss_arith_int(int *value, int *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_uint(uint *value, uint *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_size(size_t *value, size_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_pid(pid_t *value, pid_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_byte(uint8_t *value, uint8_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_int8(int8_t *value, int8_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_int16(int16_t *value, int16_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_uint16(uint16_t *value, uint16_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_int32(int32_t *value, int32_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_uint32(uint32_t *value, uint32_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_int64(int64_t *value, int64_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_uint64(uint64_t *value, uint64_t *operand, orte_dss_arith_op_t operation); - -static void orte_dss_arith_std_cntr(orte_std_cntr_t *value, orte_std_cntr_t *operand, orte_dss_arith_op_t operation); - -/* some weird ones - but somebody *might* want to do it, I suppose... */ -static void orte_dss_arith_data_type(orte_data_type_t *value, orte_data_type_t *operand, orte_dss_arith_op_t operation); -static void orte_dss_arith_daemon_cmd(orte_daemon_cmd_flag_t *value, orte_daemon_cmd_flag_t *operand, orte_dss_arith_op_t operation); - -#if OPAL_ENABLE_FT == 1 -static void orte_dss_arith_ckpt_cmd(orte_daemon_cmd_flag_t *value, orte_daemon_cmd_flag_t *operand, orte_dss_arith_op_t operation); -#endif - -int orte_dss_arith(orte_data_value_t *value, orte_data_value_t *operand, orte_dss_arith_op_t operation) -{ - /* check for error */ - if (NULL == value || NULL == operand) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (operand->type != value->type) { - ORTE_ERROR_LOG(ORTE_ERR_TYPE_MISMATCH); - return ORTE_ERR_TYPE_MISMATCH; - } - - /* Lookup the arith function for this type and call it */ - - switch(operand->type) { - case ORTE_INT: - orte_dss_arith_int((int*)value->data, (int*)operand->data, operation); - break; - - case ORTE_UINT: - orte_dss_arith_uint((uint*)value->data, (uint*)operand->data, operation); - break; - - case ORTE_SIZE: - orte_dss_arith_size((size_t*)value->data, (size_t*)operand->data, operation); - break; - - case ORTE_PID: - orte_dss_arith_pid((pid_t*)value->data, (pid_t*)operand->data, operation); - break; - - case ORTE_BYTE: - case ORTE_UINT8: - orte_dss_arith_byte((uint8_t*)value->data, (uint8_t*)operand->data, operation); - break; - - case ORTE_INT8: - orte_dss_arith_int8((int8_t*)value->data, (int8_t*)operand->data, operation); - break; - - case ORTE_INT16: - orte_dss_arith_int16((int16_t*)value->data, (int16_t*)operand->data, operation); - break; - - case ORTE_UINT16: - orte_dss_arith_uint16((uint16_t*)value->data, (uint16_t*)operand->data, operation); - break; - - case ORTE_INT32: - orte_dss_arith_int32((int32_t*)value->data, (int32_t*)operand->data, operation); - break; - - case ORTE_UINT32: - orte_dss_arith_uint32((uint32_t*)value->data, (uint32_t*)operand->data, operation); - break; - - case ORTE_INT64: - orte_dss_arith_int64((int64_t*)value->data, (int64_t*)operand->data, operation); - break; - - case ORTE_UINT64: - orte_dss_arith_uint64((uint64_t*)value->data, (uint64_t*)operand->data, operation); - break; - - case ORTE_STD_CNTR: - orte_dss_arith_std_cntr((orte_std_cntr_t*)value->data, (orte_std_cntr_t*)operand->data, operation); - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return ORTE_ERR_OPERATION_UNSUPPORTED; - } - - return ORTE_SUCCESS; -} - -int orte_dss_increment(orte_data_value_t *value) -{ - int one; - unsigned int uone; - size_t sone; - pid_t pone; - uint8_t u8one; - int8_t i8one; - uint16_t u16one; - int16_t i16one; - uint32_t u32one; - int32_t i32one; - uint64_t u64one; - int64_t i64one; - orte_daemon_cmd_flag_t daemoncmdone; - orte_data_type_t datatypeone; - orte_std_cntr_t stdcntrone; -#if OPAL_ENABLE_FT == 1 - orte_daemon_cmd_flag_t ckptcmdone; -#endif - - /* check for error */ - if (NULL == value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - /* Lookup the arith function for this type and call it */ - - switch(value->type) { - case ORTE_INT: - one = 1; - orte_dss_arith_int((int*)value->data, &one, ORTE_DSS_ADD); - break; - - case ORTE_UINT: - uone = 1; - orte_dss_arith_uint((uint*)value->data, &uone, ORTE_DSS_ADD); - break; - - case ORTE_SIZE: - sone = 1; - orte_dss_arith_size((size_t*)value->data, &sone, ORTE_DSS_ADD); - break; - - case ORTE_PID: - pone = 1; - orte_dss_arith_pid((pid_t*)value->data, &pone, ORTE_DSS_ADD); - break; - - case ORTE_BYTE: - case ORTE_UINT8: - u8one = 1; - orte_dss_arith_byte((uint8_t*)value->data, &u8one, ORTE_DSS_ADD); - break; - - case ORTE_INT8: - i8one = 1; - orte_dss_arith_int8((int8_t*)value->data, &i8one, ORTE_DSS_ADD); - break; - - case ORTE_INT16: - i16one = 1; - orte_dss_arith_int16((int16_t*)value->data, &i16one, ORTE_DSS_ADD); - break; - - case ORTE_UINT16: - u16one = 1; - orte_dss_arith_uint16((uint16_t*)value->data, &u16one, ORTE_DSS_ADD); - break; - - case ORTE_INT32: - i32one = 1; - orte_dss_arith_int32((int32_t*)value->data, &i32one, ORTE_DSS_ADD); - break; - - case ORTE_UINT32: - u32one = 1; - orte_dss_arith_uint32((uint32_t*)value->data, &u32one, ORTE_DSS_ADD); - break; - - case ORTE_INT64: - i64one = 1; - orte_dss_arith_int64((int64_t*)value->data, &i64one, ORTE_DSS_ADD); - break; - - case ORTE_UINT64: - u64one = 1; - orte_dss_arith_uint64((uint64_t*)value->data, &u64one, ORTE_DSS_ADD); - break; - - case ORTE_DAEMON_CMD: - daemoncmdone = 1; - orte_dss_arith_daemon_cmd((orte_daemon_cmd_flag_t*)value->data, &daemoncmdone, ORTE_DSS_ADD); - break; - -#if OPAL_ENABLE_FT == 1 - case ORTE_CKPT_CMD: - ckptcmdone = 1; - orte_dss_arith_ckpt_cmd(value->data, &ckptcmdone, ORTE_DSS_ADD); - break; -#endif - - case ORTE_DATA_TYPE: - datatypeone = 1; - orte_dss_arith_data_type((orte_data_type_t*)value->data, &datatypeone, ORTE_DSS_ADD); - break; - - case ORTE_STD_CNTR: - stdcntrone = 1; - orte_dss_arith_std_cntr((orte_std_cntr_t*)value->data, &stdcntrone, ORTE_DSS_ADD); - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return ORTE_ERR_OPERATION_UNSUPPORTED; - } - - return ORTE_SUCCESS; -} - -int orte_dss_decrement(orte_data_value_t *value) -{ - int one; - unsigned int uone; - size_t sone; - pid_t pone; - uint8_t u8one; - int8_t i8one; - uint16_t u16one; - int16_t i16one; - uint32_t u32one; - int32_t i32one; - uint64_t u64one; - int64_t i64one; - orte_daemon_cmd_flag_t daemoncmdone; - orte_data_type_t datatypeone; - orte_std_cntr_t stdcntrone; -#if OPAL_ENABLE_FT == 1 - orte_daemon_cmd_flag_t ckptcmdone; -#endif - - /* check for error */ - if (NULL == value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - /* Lookup the arith function for this type and call it */ - - switch(value->type) { - case ORTE_INT: - one = 1; - orte_dss_arith_int((int*)value->data, &one, ORTE_DSS_SUB); - break; - - case ORTE_UINT: - uone = 1; - orte_dss_arith_uint((uint*)value->data, &uone, ORTE_DSS_SUB); - break; - - case ORTE_SIZE: - sone = 1; - orte_dss_arith_size((size_t*)value->data, &sone, ORTE_DSS_SUB); - break; - - case ORTE_PID: - pone = 1; - orte_dss_arith_pid((pid_t*)value->data, &pone, ORTE_DSS_SUB); - break; - - case ORTE_BYTE: - case ORTE_UINT8: - u8one = 1; - orte_dss_arith_byte((uint8_t*)value->data, &u8one, ORTE_DSS_SUB); - break; - - case ORTE_INT8: - i8one = 1; - orte_dss_arith_int8((int8_t*)value->data, &i8one, ORTE_DSS_SUB); - break; - - case ORTE_INT16: - i16one = 1; - orte_dss_arith_int16((int16_t*)value->data, &i16one, ORTE_DSS_SUB); - break; - - case ORTE_UINT16: - u16one = 1; - orte_dss_arith_uint16((uint16_t*)value->data, &u16one, ORTE_DSS_SUB); - break; - - case ORTE_INT32: - i32one = 1; - orte_dss_arith_int32((int32_t*)value->data, &i32one, ORTE_DSS_SUB); - break; - - case ORTE_UINT32: - u32one = 1; - orte_dss_arith_uint32((uint32_t*)value->data, &u32one, ORTE_DSS_SUB); - break; - - case ORTE_INT64: - i64one = 1; - orte_dss_arith_int64((int64_t*)value->data, &i64one, ORTE_DSS_SUB); - break; - - case ORTE_UINT64: - u64one = 1; - orte_dss_arith_uint64((uint64_t*)value->data, &u64one, ORTE_DSS_SUB); - break; - - case ORTE_DAEMON_CMD: - daemoncmdone = 1; - orte_dss_arith_daemon_cmd((orte_daemon_cmd_flag_t*)value->data, &daemoncmdone, ORTE_DSS_SUB); - break; - -#if OPAL_ENABLE_FT == 1 - case ORTE_CKPT_CMD: - ckptcmdone = 1; - orte_dss_arith_ckpt_cmd(value->data, &ckptcmdone, ORTE_DSS_SUB); - break; -#endif - - case ORTE_DATA_TYPE: - datatypeone = 1; - orte_dss_arith_data_type((orte_data_type_t*)value->data, &datatypeone, ORTE_DSS_SUB); - break; - - case ORTE_STD_CNTR: - stdcntrone = 1; - orte_dss_arith_std_cntr((orte_std_cntr_t*)value->data, &stdcntrone, ORTE_DSS_SUB); - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return ORTE_ERR_OPERATION_UNSUPPORTED; - } - - return ORTE_SUCCESS; -} - -/* - * NUMERIC arith FUNCTIONS - */ -static void orte_dss_arith_int(int *value, int *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_uint(uint *value, uint *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_size(size_t *value, size_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_pid(pid_t *value, pid_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_byte(uint8_t *value, uint8_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_int8(int8_t *value, int8_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_int16(int16_t *value, int16_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_uint16(uint16_t *value, uint16_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_int32(int32_t *value, int32_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_uint32(uint32_t *value, uint32_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_int64(int64_t *value, int64_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_uint64(uint64_t *value, uint64_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_std_cntr(orte_std_cntr_t *value, orte_std_cntr_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_data_type(orte_data_type_t *value, orte_data_type_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -static void orte_dss_arith_daemon_cmd(orte_daemon_cmd_flag_t *value, orte_daemon_cmd_flag_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - break; - - case ORTE_DSS_SUB: - (*value) -= *operand; - break; - - case ORTE_DSS_MUL: - (*value) *= *operand; - break; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - break; - } - return; -} - -#if OPAL_ENABLE_FT == 1 -static void orte_dss_arith_ckpt_cmd(orte_daemon_cmd_flag_t *value, orte_daemon_cmd_flag_t *operand, orte_dss_arith_op_t operation) -{ - switch(operation) { - case ORTE_DSS_ADD: - (*value) += *operand; - return; - - case ORTE_DSS_SUB: - (*value) -= *operand; - return; - - case ORTE_DSS_MUL: - (*value) *= *operand; - return; - - case ORTE_DSS_DIV: - if (0 == *operand) { - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - (*value) /= *operand; - return; - - default: - ORTE_ERROR_LOG(ORTE_ERR_OPERATION_UNSUPPORTED); - return; - } - return; -} -#endif diff --git a/orte/dss/dss_compare.c b/orte/dss/dss_compare.c deleted file mode 100644 index 005a2c88c2..0000000000 --- a/orte/dss/dss_compare.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" -#include "opal/util/output.h" - -int orte_dss_compare(void *value1, void *value2, orte_data_type_t type) -{ - orte_dss_type_info_t *info; - - /* check for error */ - if (NULL == value1 || NULL == value2) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* Lookup the compare function for this type and call it */ - - if (!(type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - return info->odti_compare_fn(value1, value2, type); -} - -/* - * NUMERIC COMPARE FUNCTIONS - */ -int orte_dss_compare_int(int *value1, int *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint(unsigned int *value1, unsigned int *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_size(size_t *value1, size_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_pid(pid_t *value1, pid_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_byte(char *value1, char *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_char(char *value1, char *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_int8(int8_t *value1, int8_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint8(uint8_t *value1, uint8_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_int16(int16_t *value1, int16_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint16(uint16_t *value1, uint16_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_int32(int32_t *value1, int32_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint32(uint32_t *value1, uint32_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_int64(int64_t *value1, int64_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_dss_compare_uint64(uint64_t *value1, uint64_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -/* - * NON-NUMERIC SYSTEM TYPES - */ - -/* NULL */ -int orte_dss_compare_null(char *value1, char *value2, orte_data_type_t type) -{ - return ORTE_EQUAL; -} - -/* BOOL */ -int orte_dss_compare_bool(bool *value1, bool *value2, orte_data_type_t type) -{ - if (*value1 && !(*value2)) return ORTE_VALUE1_GREATER; - - if (*value2 && !(*value1)) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; - -} - -/* STRING */ -int orte_dss_compare_string(char *value1, char *value2, orte_data_type_t type) -{ - if (0 < strcmp(value1, value2)) return ORTE_VALUE2_GREATER; - - if (0 > strcmp(value1, value2)) return ORTE_VALUE1_GREATER; - - return ORTE_EQUAL; -} - -/* COMPARE FUNCTIONS FOR GENERIC ORTE TYPES */ - -/* ORTE_STD_CNTR */ -int orte_dss_compare_std_cntr(orte_std_cntr_t *value1, orte_std_cntr_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -/* ORTE_DATA_TYPE */ -int orte_dss_compare_dt(orte_data_type_t *value1, orte_data_type_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -#if OPAL_ENABLE_FT == 1 -/* ORTE_CKPT_CMD */ -int orte_dss_compare_ckpt_cmd(size_t *value1, size_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} -#endif - -/* ORTE_DATA_VALUE */ -int orte_dss_compare_data_value(orte_data_value_t *value1, orte_data_value_t *value2, orte_data_type_t type) -{ - /* can't compare if the two types don't match */ - if (value1->type != value2->type) { - ORTE_ERROR_LOG(ORTE_ERR_TYPE_MISMATCH); - return ORTE_ERR_TYPE_MISMATCH; - } - - /* okay, go ahead and compare the values themselves */ - return orte_dss.compare(value1->data, value2->data, value1->type); -} - -/* ORTE_BYTE_OBJECT */ -int orte_dss_compare_byte_object(orte_byte_object_t *value1, orte_byte_object_t *value2, orte_data_type_t type) -{ - int checksum, diff; - orte_std_cntr_t i; - - /* compare the sizes first - bigger size object is "greater than" */ - if (value1->size > value2->size) return ORTE_VALUE1_GREATER; - - if (value2->size > value1->size) return ORTE_VALUE2_GREATER; - - /* get here if the two sizes are identical - now do a simple checksum-style - * calculation to determine "biggest" - */ - checksum = 0; - - for (i=0; i < value1->size; i++) { - /* protect against overflows */ - diff = value1->bytes[i] - value2->bytes[i]; - if (INT_MAX-abs(checksum)-abs(diff) < 0) { /* got an overflow condition */ - checksum = 0; - } - checksum += diff; - } - - if (0 > checksum) return ORTE_VALUE2_GREATER; /* sum of value2 bytes was greater */ - - if (0 < checksum) return ORTE_VALUE1_GREATER; /* of value1 bytes was greater */ - - return ORTE_EQUAL; /* sum of both value's bytes was identical */ -} diff --git a/orte/dss/dss_copy.c b/orte/dss/dss_copy.c deleted file mode 100644 index 340f82334f..0000000000 --- a/orte/dss/dss_copy.c +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" -#include "opal/util/output.h" - -int orte_dss_copy(void **dest, void *src, orte_data_type_t type) -{ - int rc; - orte_dss_type_info_t *info; - - /* check for error */ - if (NULL == dest) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (NULL == src && (ORTE_NULL != type && ORTE_STRING != type)) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* Lookup the copy function for this type and call it */ - - if (!(type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - if (ORTE_SUCCESS != (rc = info->odti_copy_fn(dest, src, type))) { - ORTE_ERROR_LOG(rc); - } - - - return rc; -} - -/* - * STANDARD COPY FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ -int orte_dss_std_copy(void **dest, void *src, orte_data_type_t type) -{ - size_t datasize; - uint8_t *val = NULL; - - switch(type) { - case ORTE_BOOL: - datasize = sizeof(bool); - break; - - case ORTE_INT: - case ORTE_UINT: - datasize = sizeof(int); - break; - - case ORTE_SIZE: - datasize = sizeof(size_t); - break; - - case ORTE_PID: - datasize = sizeof(pid_t); - break; - - case ORTE_BYTE: - case ORTE_INT8: - case ORTE_UINT8: - datasize = 1; - break; - - case ORTE_INT16: - case ORTE_UINT16: - datasize = 2; - break; - - case ORTE_INT32: - case ORTE_UINT32: - datasize = 4; - break; - - case ORTE_INT64: - case ORTE_UINT64: - datasize = 8; - break; - - case ORTE_STD_CNTR: - datasize = sizeof(orte_std_cntr_t); - break; - - case ORTE_DATA_TYPE: - datasize = sizeof(orte_data_type_t); - break; - -#if OPAL_ENABLE_FT == 1 - case ORTE_CKPT_CMD: - datasize = sizeof(size_t); - break; -#endif - - default: - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - val = (uint8_t*)malloc(datasize); - if (NULL == val) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - memcpy(val, src, datasize); - *dest = val; - - return ORTE_SUCCESS; -} - -/* COPY FUNCTIONS FOR NON-STANDARD SYSTEM TYPES */ - -/* - * NULL - */ -int orte_dss_copy_null(char **dest, char *src, orte_data_type_t type) -{ - char *val; - - *dest = (char*)malloc(sizeof(char*)); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - val = *dest; /* save the address of the value */ - - /* set the dest to null */ - *val = 0x00; - - return ORTE_SUCCESS; -} - -/* - * STRING - */ -int orte_dss_copy_string(char **dest, char *src, orte_data_type_t type) -{ - if (NULL == src) { /* got zero-length string/NULL pointer - store NULL */ - *dest = NULL; - } else { - *dest = strdup(src); - } - - return ORTE_SUCCESS; -} - -/* COPY FUNCTIONS FOR GENERIC ORTE TYPES */ - -/* - * ORTE_DATA_VALUE - */ -int orte_dss_copy_data_value(orte_data_value_t **dest, orte_data_value_t *src, - orte_data_type_t type) -{ - int rc; - - /* create the new object */ - *dest = OBJ_NEW(orte_data_value_t); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - (*dest)->type = src->type; - - /* copy the payload with its associated copy function */ - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((*dest)->data), src->data, src->type))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*dest); - return rc; - } - - return ORTE_SUCCESS; -} - - -/* - * ORTE_BYTE_OBJECT - */ -int orte_dss_copy_byte_object(orte_byte_object_t **dest, orte_byte_object_t *src, - orte_data_type_t type) -{ - /* allocate space for the new object */ - *dest = (orte_byte_object_t*)malloc(sizeof(orte_byte_object_t)); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - (*dest)->size = src->size; - - /* allocate the required space for the bytes */ - (*dest)->bytes = (uint8_t*)malloc(src->size); - if (NULL == (*dest)->bytes) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(*dest); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* copy the data across */ - memcpy((*dest)->bytes, src->bytes, src->size); - - return ORTE_SUCCESS; -} diff --git a/orte/dss/dss_internal.h b/orte/dss/dss_internal.h deleted file mode 100644 index f218411d41..0000000000 --- a/orte/dss/dss_internal.h +++ /dev/null @@ -1,507 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef ORTE_DSS_INTERNAL_H_ -#define ORTE_DSS_INTERNAL_H_ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/class/orte_pointer_array.h" - -#include "orte/dss/dss.h" - -#if HAVE_STRING_H -# if !defined(STDC_HEADERS) && HAVE_MEMORY_H -# include -# endif -# include -#endif - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * The default starting chunk size - */ -#define ORTE_DSS_DEFAULT_INITIAL_SIZE 128 -/* - * The default threshold size when we switch from doubling the - * buffer size to addatively increasing it - */ -#define ORTE_DSS_DEFAULT_THRESHOLD_SIZE 1024 - -/* - * Internal type corresponding to size_t. Do not use this in - * interface calls - use ORTE_SIZE instead. - */ -#if SIZEOF_SIZE_T == 1 -#define DSS_TYPE_SIZE_T ORTE_UINT8 -#elif SIZEOF_SIZE_T == 2 -#define DSS_TYPE_SIZE_T ORTE_UINT16 -#elif SIZEOF_SIZE_T == 4 -#define DSS_TYPE_SIZE_T ORTE_UINT32 -#elif SIZEOF_SIZE_T == 8 -#define DSS_TYPE_SIZE_T ORTE_UINT64 -#else -#error Unsupported size_t size! -#endif - -/* - * Internal type corresponding to bool. Do not use this in interface - * calls - use ORTE_BOOL instead. - */ -#if SIZEOF_BOOL == 1 -#define DSS_TYPE_BOOL ORTE_UINT8 -#elif SIZEOF_BOOL == 2 -#define DSS_TYPE_BOOL ORTE_UINT16 -#elif SIZEOF_BOOL == 4 -#define DSS_TYPE_BOOL ORTE_UINT32 -#elif SIZEOF_BOOL == 8 -#define DSS_TYPE_BOOL ORTE_UINT64 -#else -#error Unsupported bool size! -#endif - -/* - * Internal type corresponding to int and unsigned int. Do not use - * this in interface calls - use ORTE_INT / ORTE_UINT instead. - */ -#if SIZEOF_INT == 1 -#define DSS_TYPE_INT ORTE_INT8 -#define DSS_TYPE_UINT ORTE_UINT8 -#elif SIZEOF_INT == 2 -#define DSS_TYPE_INT ORTE_INT16 -#define DSS_TYPE_UINT ORTE_UINT16 -#elif SIZEOF_INT == 4 -#define DSS_TYPE_INT ORTE_INT32 -#define DSS_TYPE_UINT ORTE_UINT32 -#elif SIZEOF_INT == 8 -#define DSS_TYPE_INT ORTE_INT64 -#define DSS_TYPE_UINT ORTE_UINT64 -#else -#error Unsupported int size! -#endif - -/* - * Internal type corresponding to pid_t. Do not use this in interface - * calls - use ORTE_PID instead. - */ -#if SIZEOF_PID_T == 1 -#define DSS_TYPE_PID_T ORTE_UINT8 -#elif SIZEOF_PID_T == 2 -#define DSS_TYPE_PID_T ORTE_UINT16 -#elif SIZEOF_PID_T == 4 -#define DSS_TYPE_PID_T ORTE_UINT32 -#elif SIZEOF_PID_T == 8 -#define DSS_TYPE_PID_T ORTE_UINT64 -#else -#error Unsupported pid_t size! -#endif - -/* Unpack generic size macros */ -#define UNPACK_SIZE_MISMATCH(unpack_type, remote_type, ret) \ -do { \ - switch(remote_type) { \ - case ORTE_UINT8: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint8_t, remote_type); \ - break; \ - case ORTE_INT8: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int8_t, remote_type); \ - break; \ - case ORTE_UINT16: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint16_t, remote_type); \ - break; \ - case ORTE_INT16: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int16_t, remote_type); \ - break; \ - case ORTE_UINT32: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint32_t, remote_type); \ - break; \ - case ORTE_INT32: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int32_t, remote_type); \ - break; \ - case ORTE_UINT64: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint64_t, remote_type); \ - break; \ - case ORTE_INT64: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int64_t, remote_type); \ - break; \ - default: \ - ret = ORTE_ERR_NOT_FOUND; \ - ORTE_ERROR_LOG(ret); \ - } \ -} while (0) - -/* NOTE: do not need to deal with endianness here, as the unpacking of -the underling sender-side type will do that for us. Repeat: the -data in tmpbuf[] is already in host byte order. */ -#define UNPACK_SIZE_MISMATCH_FOUND(unpack_type, tmptype, tmpdsstype) \ -do { \ - orte_std_cntr_t i; \ - tmptype *tmpbuf = (tmptype*)malloc(sizeof(tmptype) * (*num_vals)); \ - ret = orte_dss_unpack_buffer(buffer, tmpbuf, num_vals, tmpdsstype); \ - for (i = 0 ; i < *num_vals ; ++i) { \ - ((unpack_type*) dest)[i] = (unpack_type)(tmpbuf[i]); \ - } \ - free(tmpbuf); \ -} while (0) - - -/** - * Internal struct used for holding registered dss functions - */ -struct orte_dss_type_info_t { - opal_object_t super; - /* type identifier */ - orte_data_type_t odti_type; - /** Debugging string name */ - char *odti_name; - /** Pack function */ - orte_dss_pack_fn_t odti_pack_fn; - /** Unpack function */ - orte_dss_unpack_fn_t odti_unpack_fn; - /** copy function */ - orte_dss_copy_fn_t odti_copy_fn; - /** compare function */ - orte_dss_compare_fn_t odti_compare_fn; - /** size function */ - orte_dss_size_fn_t odti_size_fn; - /** print function */ - orte_dss_print_fn_t odti_print_fn; - /** Release function */ - orte_dss_release_fn_t odti_release_fn; - /** flag to indicate structured data */ - bool odti_structured; -}; -/** - * Convenience typedef - */ -typedef struct orte_dss_type_info_t orte_dss_type_info_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_dss_type_info_t); - -/* - * globals needed within dss - */ -extern bool orte_dss_initialized; -extern bool orte_dss_debug; -extern int orte_dss_verbose; -extern int orte_dss_initial_size; -extern int orte_dss_threshold_size; -extern orte_pointer_array_t *orte_dss_types; -extern orte_data_type_t orte_dss_num_reg_types; - - /* - * Implementations of API functions - */ - - int orte_dss_set(orte_data_value_t *value, void *new_value, orte_data_type_t type); - - int orte_dss_get(void **data, orte_data_value_t *value, orte_data_type_t type); - - int orte_dss_arith(orte_data_value_t *value, orte_data_value_t *operand, orte_dss_arith_op_t operation); - - int orte_dss_increment(orte_data_value_t *value); - - int orte_dss_decrement(orte_data_value_t *value); - - int orte_dss_set_buffer_type(orte_buffer_t *buffer, orte_dss_buffer_type_t type); - - int orte_dss_pack(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, - orte_data_type_t type); - int orte_dss_unpack(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *max_num_vals, - orte_data_type_t type); - - int orte_dss_copy(void **dest, void *src, orte_data_type_t type); - - int orte_dss_compare(void *value1, void *value2, - orte_data_type_t type); - - int orte_dss_print(char **output, char *prefix, void *src, orte_data_type_t type); - - int orte_dss_dump(int output_stream, void *src, orte_data_type_t type); - - int orte_dss_size(size_t *size, void *src, orte_data_type_t type); - - int orte_dss_peek(orte_buffer_t *buffer, orte_data_type_t *type, - orte_std_cntr_t *number); - - int orte_dss_peek_type(orte_buffer_t *buffer, orte_data_type_t *type); - - int orte_dss_unload(orte_buffer_t *buffer, void **payload, - orte_std_cntr_t *bytes_used); - int orte_dss_load(orte_buffer_t *buffer, void *payload, orte_std_cntr_t bytes_used); - - int orte_dss_xfer_payload(orte_buffer_t *dest, orte_buffer_t *src); - - int orte_dss_copy_payload(orte_buffer_t *dest, orte_buffer_t *src); - - int orte_dss_register(orte_dss_pack_fn_t pack_fn, - orte_dss_unpack_fn_t unpack_fn, - orte_dss_copy_fn_t copy_fn, - orte_dss_compare_fn_t compare_fn, - orte_dss_size_fn_t size_fn, - orte_dss_print_fn_t print_fn, - orte_dss_release_fn_t release_fn, - bool structured, - const char *name, orte_data_type_t *type); - - void orte_dss_release(orte_data_value_t *value); - - char *orte_dss_lookup_data_type(orte_data_type_t type); - - void orte_dss_dump_data_types(int output); - - /* - * Non-API functions - */ - int orte_dss_pack_buffer(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type); - - int orte_dss_unpack_buffer(orte_buffer_t *buffer, void *dst, orte_std_cntr_t *num_vals, - orte_data_type_t type); - - /* - * Internal pack functions - */ - - int orte_dss_pack_null(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_byte(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_bool(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_int(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_int16(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_int32(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_int64(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_sizet(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_pid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_string(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_std_cntr(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num, - orte_data_type_t type); - - int orte_dss_pack_data_type(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -#if OPAL_ENABLE_FT == 1 - int orte_dss_pack_ckpt_cmd(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); -#endif - - int orte_dss_pack_data_value(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - int orte_dss_pack_byte_object(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - - /* - * Internal unpack functions - */ - - int orte_dss_unpack_null(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_byte(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_bool(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_int(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_int16(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_int32(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_int64(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_sizet(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_pid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_string(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_std_cntr(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num, - orte_data_type_t type); - - int orte_dss_unpack_data_type(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -#if OPAL_ENABLE_FT == 1 - int orte_dss_unpack_ckpt_cmd(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); -#endif - - int orte_dss_unpack_data_value(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - int orte_dss_unpack_byte_object(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - /* - * Internal copy functions - */ - - int orte_dss_std_copy(void **dest, void *src, orte_data_type_t type); - - int orte_dss_copy_null(char **dest, char *src, orte_data_type_t type); - - int orte_dss_copy_string(char **dest, char *src, orte_data_type_t type); - - int orte_dss_copy_byte_object(orte_byte_object_t **dest, orte_byte_object_t *src, - orte_data_type_t type); - - int orte_dss_copy_data_value(orte_data_value_t **dest, orte_data_value_t *src, - orte_data_type_t type); - /* - * Internal compare functions - */ - - int orte_dss_compare_bool(bool *value1, bool *value2, orte_data_type_t type); - - int orte_dss_compare_int(int *value1, int *value2, orte_data_type_t type); - int orte_dss_compare_uint(unsigned int *value1, unsigned int *value2, orte_data_type_t type); - - int orte_dss_compare_size(size_t *value1, size_t *value2, orte_data_type_t type); - - int orte_dss_compare_pid(pid_t *value1, pid_t *value2, orte_data_type_t type); - - int orte_dss_compare_byte(char *value1, char *value2, orte_data_type_t type); - int orte_dss_compare_char(char *value1, char *value2, orte_data_type_t type); - int orte_dss_compare_int8(int8_t *value1, int8_t *value2, orte_data_type_t type); - int orte_dss_compare_uint8(uint8_t *value1, uint8_t *value2, orte_data_type_t type); - - int orte_dss_compare_int16(int16_t *value1, int16_t *value2, orte_data_type_t type); - int orte_dss_compare_uint16(uint16_t *value1, uint16_t *value2, orte_data_type_t type); - - int orte_dss_compare_int32(int32_t *value1, int32_t *value2, orte_data_type_t type); - int orte_dss_compare_uint32(uint32_t *value1, uint32_t *value2, orte_data_type_t type); - - int orte_dss_compare_int64(int64_t *value1, int64_t *value2, orte_data_type_t type); - int orte_dss_compare_uint64(uint64_t *value1, uint64_t *value2, orte_data_type_t type); - - int orte_dss_compare_null(char *value1, char *value2, orte_data_type_t type); - - int orte_dss_compare_string(char *value1, char *value2, orte_data_type_t type); - - int orte_dss_compare_std_cntr(orte_std_cntr_t *value1, orte_std_cntr_t *value2, orte_data_type_t type); - - int orte_dss_compare_dt(orte_data_type_t *value1, orte_data_type_t *value2, orte_data_type_t type); - -#if OPAL_ENABLE_FT == 1 - int orte_dss_compare_ckpt_cmd(size_t *value1, size_t *value2, orte_data_type_t type); -#endif - - int orte_dss_compare_data_value(orte_data_value_t *value1, orte_data_value_t *value2, orte_data_type_t type); - - int orte_dss_compare_byte_object(orte_byte_object_t *value1, orte_byte_object_t *value2, orte_data_type_t type); - - /* - * Internal size functions - */ - int orte_dss_std_size(size_t *size, void *src, orte_data_type_t type); - - int orte_dss_size_string(size_t *size, char *src, orte_data_type_t type); - - int orte_dss_size_data_value(size_t *size, orte_data_value_t *src, orte_data_type_t type); - - int orte_dss_size_byte_object(size_t *size, orte_byte_object_t *src, orte_data_type_t type); - - /* - * Internal print functions - */ - int orte_dss_print_byte(char **output, char *prefix, uint8_t *src, orte_data_type_t type); - - int orte_dss_print_string(char **output, char *prefix, char *src, orte_data_type_t type); - - int orte_dss_print_size(char **output, char *prefix, size_t *src, orte_data_type_t type); - int orte_dss_print_pid(char **output, char *prefix, pid_t *src, orte_data_type_t type); - int orte_dss_print_bool(char **output, char *prefix, bool *src, orte_data_type_t type); - int orte_dss_print_int(char **output, char *prefix, int *src, orte_data_type_t type); - int orte_dss_print_uint(char **output, char *prefix, int *src, orte_data_type_t type); - int orte_dss_print_uint8(char **output, char *prefix, uint8_t *src, orte_data_type_t type); - int orte_dss_print_uint16(char **output, char *prefix, uint16_t *src, orte_data_type_t type); - int orte_dss_print_uint32(char **output, char *prefix, uint32_t *src, orte_data_type_t type); - int orte_dss_print_int8(char **output, char *prefix, int8_t *src, orte_data_type_t type); - int orte_dss_print_int16(char **output, char *prefix, int16_t *src, orte_data_type_t type); - int orte_dss_print_int32(char **output, char *prefix, int32_t *src, orte_data_type_t type); -#ifdef HAVE_INT64_T - int orte_dss_print_uint64(char **output, char *prefix, uint64_t *src, orte_data_type_t type); - int orte_dss_print_int64(char **output, char *prefix, int64_t *src, orte_data_type_t type); -#else - int orte_dss_print_uint64(char **output, char *prefix, void *src, orte_data_type_t type); - int orte_dss_print_int64(char **output, char *prefix, void *src, orte_data_type_t type); -#endif - int orte_dss_print_null(char **output, char *prefix, void *src, orte_data_type_t type); - int orte_dss_print_std_cntr(char **output, char *prefix, orte_std_cntr_t *src, orte_data_type_t type); - int orte_dss_print_data_type(char **output, char *prefix, orte_data_type_t *src, orte_data_type_t type); -#if OPAL_ENABLE_FT == 1 - int orte_dss_print_ckpt_cmd(char **output, char *prefix, size_t *src, orte_data_type_t type); -#endif - int orte_dss_print_data_value(char **output, char *prefix, orte_data_value_t *src, orte_data_type_t type); - int orte_dss_print_byte_object(char **output, char *prefix, orte_byte_object_t *src, orte_data_type_t type); - - - /* - * Internal release functions - */ - void orte_dss_std_release(orte_data_value_t *value); - - void orte_dss_std_obj_release(orte_data_value_t *value); - - void orte_dss_release_byte_object(orte_data_value_t *value); - - /* - * Internal helper functions - */ - - char* orte_dss_buffer_extend(orte_buffer_t *bptr, size_t bytes_to_add); - - bool orte_dss_too_small(orte_buffer_t *buffer, size_t bytes_reqd); - - orte_dss_type_info_t* orte_dss_find_type(orte_data_type_t type); - - int orte_dss_store_data_type(orte_buffer_t *buffer, orte_data_type_t type); - - int orte_dss_get_data_type(orte_buffer_t *buffer, orte_data_type_t *type); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/dss/dss_open_close.c b/orte/dss/dss_open_close.c deleted file mode 100644 index eb785efa08..0000000000 --- a/orte/dss/dss_open_close.c +++ /dev/null @@ -1,503 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "opal/mca/base/mca_base_param.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" - -#include "orte/dss/dss_internal.h" - -/** - * globals - */ -bool orte_dss_initialized = false; -bool orte_dss_debug = false; -int orte_dss_verbose = -1; /* by default disabled */ -int orte_dss_initial_size; -int orte_dss_threshold_size; -orte_pointer_array_t *orte_dss_types; -orte_data_type_t orte_dss_num_reg_types; -orte_dss_buffer_type_t default_buf_type; - -orte_dss_t orte_dss = { - orte_dss_set, - orte_dss_get, - orte_dss_arith, - orte_dss_increment, - orte_dss_decrement, - orte_dss_set_buffer_type, - orte_dss_pack, - orte_dss_unpack, - orte_dss_copy, - orte_dss_compare, - orte_dss_size, - orte_dss_print, - orte_dss_release, - orte_dss_peek, - orte_dss_unload, - orte_dss_load, - orte_dss_xfer_payload, - orte_dss_copy_payload, - orte_dss_register, - orte_dss_lookup_data_type, - orte_dss_dump_data_types, - orte_dss_dump -}; - -/** - * Object constructors, destructors, and instantiations - */ -/** Data Value **/ -/* constructor - used to initialize state of data value instance */ -static void orte_data_value_construct(orte_data_value_t* ptr) -{ - ptr->type = ORTE_UNDEF; - ptr->data = NULL; -} -/* destructor - used to release data value instance */ -static void orte_data_value_destruct(orte_data_value_t* ptr) -{ - if (NULL != ptr->data) { - orte_dss.release(ptr); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_data_value_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_data_value_construct, /* constructor */ - orte_data_value_destruct); /* destructor */ - - -static void orte_buffer_construct (orte_buffer_t* buffer) -{ - /** set the default buffer type */ - buffer->type = default_buf_type; - - /* Make everything NULL to begin with */ - - buffer->base_ptr = buffer->pack_ptr = buffer->unpack_ptr = NULL; - buffer->bytes_allocated = buffer->bytes_used = 0; -} - -static void orte_buffer_destruct (orte_buffer_t* buffer) -{ - if (NULL != buffer) { - if (NULL != buffer->base_ptr) { - free (buffer->base_ptr); - } - } -} - -OBJ_CLASS_INSTANCE(orte_buffer_t, - opal_object_t, - orte_buffer_construct, - orte_buffer_destruct); - - -static void orte_dss_type_info_construct(orte_dss_type_info_t *obj) -{ - obj->odti_name = NULL; - obj->odti_pack_fn = NULL; - obj->odti_unpack_fn = NULL; - obj->odti_copy_fn = NULL; - obj->odti_compare_fn = NULL; - obj->odti_size_fn = NULL; - obj->odti_print_fn = NULL; - obj->odti_release_fn = NULL; - obj->odti_structured = false; -} - -static void orte_dss_type_info_destruct(orte_dss_type_info_t *obj) -{ - if (NULL != obj->odti_name) { - free(obj->odti_name); - } -} - -OBJ_CLASS_INSTANCE(orte_dss_type_info_t, opal_object_t, - orte_dss_type_info_construct, - orte_dss_type_info_destruct); - - -int orte_dss_open(void) -{ - char *enviro_val; - int id, rc; - orte_data_type_t tmp; - int def_type; - - if (orte_dss_initialized) { - return ORTE_SUCCESS; - } - - enviro_val = getenv("ORTE_dss_debug"); - if (NULL != enviro_val) { /* debug requested */ - orte_dss_debug = true; - } else { - orte_dss_debug = false; - } - - /** set the default buffer type. If we are in debug mode, then we default - * to fully described buffers. Otherwise, we default to non-described for brevity - * and performance - */ -#if OMPI_ENABLE_DEBUG - def_type = ORTE_DSS_BUFFER_FULLY_DESC; -#else - def_type = ORTE_DSS_BUFFER_NON_DESC; -#endif - - id = mca_base_param_register_int("dss", "buffer", "type", - "Set the default mode for OpenRTE buffers (0=non-described, 1=described)", - def_type); - mca_base_param_lookup_int(id, &rc); - default_buf_type = rc; - - /* setup the initial size of the buffer. */ - id = mca_base_param_register_int("dss", "buffer_initial", "size", NULL, - ORTE_DSS_DEFAULT_INITIAL_SIZE); - mca_base_param_lookup_int(id, &orte_dss_initial_size); - - /* the threshold as to where to stop doubling the size of the buffer - * allocated memory and start doing additive increases */ - id = mca_base_param_register_int("dss", "buffer_threshold", "size", NULL, - ORTE_DSS_DEFAULT_THRESHOLD_SIZE); - mca_base_param_lookup_int(id, &orte_dss_threshold_size); - - /* Setup the types array */ - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&orte_dss_types, - ORTE_DSS_ID_DYNAMIC, - ORTE_DSS_ID_MAX, - ORTE_DSS_ID_MAX))) { - ORTE_ERROR_LOG(rc); - return rc; - } - orte_dss_num_reg_types = 0; - - /* Register all the intrinsic types */ - - tmp = ORTE_NULL; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_null, - orte_dss_unpack_null, - (orte_dss_copy_fn_t)orte_dss_copy_null, - (orte_dss_compare_fn_t)orte_dss_compare_null, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_null, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_NULL", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_BYTE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_byte, - orte_dss_unpack_byte, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_byte, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_byte, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_BYTE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_BOOL; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_bool, - orte_dss_unpack_bool, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_bool, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_bool, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_BOOL", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int, - orte_dss_unpack_int, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int, - orte_dss_unpack_int, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT8; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_byte, - orte_dss_unpack_byte, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int8, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int8, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT8", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT8; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_byte, - orte_dss_unpack_byte, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint8, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint8, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT8", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT16; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int16, - orte_dss_unpack_int16, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int16, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int16, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT16", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT16; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int16, - orte_dss_unpack_int16, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint16, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint16, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT16", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT32; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int32, - orte_dss_unpack_int32, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int32, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int32, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT32", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT32; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int32, - orte_dss_unpack_int32, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint32, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint32, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT32", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_INT64; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int64, - orte_dss_unpack_int64, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_int64, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_int64, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_INT64", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_UINT64; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_int64, - orte_dss_unpack_int64, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_uint64, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_uint64, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_UINT64", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_SIZE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_sizet, - orte_dss_unpack_sizet, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_size, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_size, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_SIZE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_PID; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_pid, - orte_dss_unpack_pid, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_pid, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_pid, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_PID", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_STRING; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_string, - orte_dss_unpack_string, - (orte_dss_copy_fn_t)orte_dss_copy_string, - (orte_dss_compare_fn_t)orte_dss_compare_string, - (orte_dss_size_fn_t)orte_dss_size_string, - (orte_dss_print_fn_t)orte_dss_print_string, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_STRUCTURED, - "ORTE_STRING", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_STD_CNTR; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_std_cntr, - orte_dss_unpack_std_cntr, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_std_cntr, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_std_cntr, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_STD_CNTR", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_DATA_TYPE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_data_type, - orte_dss_unpack_data_type, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_dt, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_data_type, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_DATA_TYPE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - tmp = ORTE_DATA_VALUE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_data_value, - orte_dss_unpack_data_value, - (orte_dss_copy_fn_t)orte_dss_copy_data_value, - (orte_dss_compare_fn_t)orte_dss_compare_data_value, - (orte_dss_size_fn_t)orte_dss_size_data_value, - (orte_dss_print_fn_t)orte_dss_print_data_value, - (orte_dss_release_fn_t)orte_dss_std_obj_release, - ORTE_DSS_STRUCTURED, - "ORTE_DATA_VALUE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - -#if OPAL_ENABLE_FT == 1 - tmp = ORTE_CKPT_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_ckpt_cmd, - orte_dss_unpack_ckpt_cmd, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_ckpt_cmd, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_ckpt_cmd, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_DATA_TYPE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } -#endif - - tmp = ORTE_BYTE_OBJECT; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_byte_object, - orte_dss_unpack_byte_object, - (orte_dss_copy_fn_t)orte_dss_copy_byte_object, - (orte_dss_compare_fn_t)orte_dss_compare_byte_object, - (orte_dss_size_fn_t)orte_dss_size_byte_object, - (orte_dss_print_fn_t)orte_dss_print_byte_object, - (orte_dss_release_fn_t)orte_dss_release_byte_object, - ORTE_DSS_STRUCTURED, - "ORTE_BYTE_OBJECT", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* All done */ - - return ORTE_SUCCESS; -} - - -int orte_dss_close(void) -{ - orte_std_cntr_t i; - - orte_dss_initialized = false; - - for (i = 0 ; i < orte_pointer_array_get_size(orte_dss_types) ; ++i) { - orte_dss_type_info_t *info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, i); - if (NULL != info) { - OBJ_RELEASE(info); - } - } - - OBJ_RELEASE(orte_dss_types); - - return ORTE_SUCCESS; -} diff --git a/orte/dss/dss_pack.c b/orte/dss/dss_pack.c deleted file mode 100644 index bec18b7067..0000000000 --- a/orte/dss/dss_pack.c +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "opal/types.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" - -int orte_dss_pack(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int rc; - - /* check for error */ - if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* Pack the number of values */ - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - if (ORTE_SUCCESS != (rc = orte_dss_store_data_type(buffer, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - if (ORTE_SUCCESS != (rc = orte_dss_pack_std_cntr(buffer, &num_vals, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* Pack the value(s) */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, src, num_vals, type))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -int orte_dss_pack_buffer(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int rc; - orte_dss_type_info_t *info; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_buffer( %p, %p, %lu, %d )\n", - (void*)buffer, src, (long unsigned int)num_vals, (int)type ) ); - - /* Pack the declared data type */ - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - if (ORTE_SUCCESS != (rc = orte_dss_store_data_type(buffer, type))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* Lookup the pack function for this type and call it */ - - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; - } - - if (ORTE_SUCCESS != (rc = info->odti_pack_fn(buffer, src, num_vals, type))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - - -/* PACK FUNCTIONS FOR GENERIC SYSTEM TYPES */ - -/* - * BOOL - */ -int orte_dss_pack_bool(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* System types need to always be described so we can properly - unpack them. If we aren't fully described, then add the - description for this type... */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, DSS_TYPE_BOOL))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_BOOL))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * INT - */ -int orte_dss_pack_int(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* System types need to always be described so we can properly - unpack them. If we aren't fully described, then add the - description for this type... */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, DSS_TYPE_INT))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_INT))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * SIZE_T - */ -int orte_dss_pack_sizet(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* System types need to always be described so we can properly - unpack them. If we aren't fully described, then add the - description for this type... */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, DSS_TYPE_SIZE_T))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_SIZE_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * PID_T - */ -int orte_dss_pack_pid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* System types need to always be described so we can properly - unpack them. If we aren't fully described, then add the - description for this type... */ - if (ORTE_DSS_BUFFER_FULLY_DESC != buffer->type) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, DSS_TYPE_PID_T))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, DSS_TYPE_PID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - - -/* PACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ - -/* - * NULL - */ -int orte_dss_pack_null(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - char null=0x00; - char *dst; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_null * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, num_vals))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* store the nulls */ - memset(dst, (int)null, num_vals); - - /* update buffer pointers */ - buffer->pack_ptr += num_vals; - buffer->bytes_used += num_vals; - - return ORTE_SUCCESS; -} - -/* - * BYTE, CHAR, INT8 - */ -int orte_dss_pack_byte(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - char *dst; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_byte * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, num_vals))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* store the data */ - memcpy(dst, src, num_vals); - - /* update buffer pointers */ - buffer->pack_ptr += num_vals; - buffer->bytes_used += num_vals; - - return ORTE_SUCCESS; -} - -/* - * INT16 - */ -int orte_dss_pack_int16(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint16_t tmp, *srctmp = (uint16_t*) src; - char *dst; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_int16 * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, num_vals*sizeof(tmp)))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < num_vals; ++i) { - tmp = htons(srctmp[i]); - memcpy(dst, &tmp, sizeof(tmp)); - dst += sizeof(tmp); - } - buffer->pack_ptr += num_vals * sizeof(tmp); - buffer->bytes_used += num_vals * sizeof(tmp); - - return ORTE_SUCCESS; -} - -/* - * INT32 - */ -int orte_dss_pack_int32(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint32_t tmp, *srctmp = (uint32_t*) src; - char *dst; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_int32 * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, num_vals*sizeof(tmp)))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < num_vals; ++i) { - tmp = htonl(srctmp[i]); - memcpy(dst, &tmp, sizeof(tmp)); - dst += sizeof(tmp); - } - buffer->pack_ptr += num_vals * sizeof(tmp); - buffer->bytes_used += num_vals * sizeof(tmp); - - return ORTE_SUCCESS; -} - -/* - * INT64 - */ -int orte_dss_pack_int64(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint64_t tmp, *srctmp = (uint64_t*) src; - char *dst; - size_t bytes_packed = num_vals * sizeof(tmp); - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_pack_int64 * %d\n", num_vals ) ); - /* check to see if buffer needs extending */ - if (NULL == (dst = orte_dss_buffer_extend(buffer, bytes_packed))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < num_vals; ++i) { - tmp = hton64(srctmp[i]); - memcpy(dst, &tmp, sizeof(tmp)); - dst += sizeof(tmp); - } - buffer->pack_ptr += bytes_packed; - buffer->bytes_used += bytes_packed; - - return ORTE_SUCCESS; -} - -/* - * STRING - */ -int orte_dss_pack_string(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret = ORTE_SUCCESS; - orte_std_cntr_t i, len; - char **ssrc = (char**) src; - - for (i = 0; i < num_vals; ++i) { - if (NULL == ssrc[i]) { /* got zero-length string/NULL pointer - store NULL */ - len = 0; - if (ORTE_SUCCESS != (ret = orte_dss_pack_std_cntr(buffer, &len, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - len = (orte_std_cntr_t)strlen(ssrc[i]) + 1; - if (ORTE_SUCCESS != (ret = orte_dss_pack_std_cntr(buffer, &len, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - return ret; - } - if (ORTE_SUCCESS != (ret = - orte_dss_pack_byte(buffer, ssrc[i], len, ORTE_BYTE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - } - - return ORTE_SUCCESS; -} - -/* PACK FUNCTIONS FOR GENERIC ORTE TYPES */ - -/* - * ORTE_STD_CNTR - */ -int orte_dss_pack_std_cntr(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_STD_CNTR_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * ORTE_DATA_TYPE - */ -int orte_dss_pack_data_type(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_DATA_TYPE_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * ORTE_DATA_VALUE - */ -int orte_dss_pack_data_value(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num, orte_data_type_t type) -{ - orte_dss_type_info_t *info; - orte_data_value_t **sdv; - orte_std_cntr_t i; - int ret; - - sdv = (orte_data_value_t **) src; - - for (i = 0; i < num; ++i) { - /* if the src data value is NULL, then we will pack it as ORTE_NULL to indicate - * that the unpack should leave a NULL data value - */ - if (NULL == sdv[i]) { - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, ORTE_NULL))) { - ORTE_ERROR_LOG(ret); - return ret; - } - continue; - } - - /* pack the data type - we'll need it on the other end */ - if (ORTE_SUCCESS != (ret = orte_dss_store_data_type(buffer, sdv[i]->type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* if the data type is UNDEF, then nothing more to do */ - if (ORTE_UNDEF == sdv[i]->type) continue; - - /* Lookup the pack function for this type and call it */ - - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, sdv[i]->type))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; - } - - if (info->odti_structured) { - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, &(sdv[i]->data), 1, sdv[i]->type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, sdv[i]->data, 1, sdv[i]->type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - } - - return ORTE_SUCCESS; -} - -#if OPAL_ENABLE_FT == 1 -/* - * ORTE_CKPT_CMD - */ -int orte_dss_pack_ckpt_cmd(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num, - orte_data_type_t type) -{ - size_t required; - int rc; - - required = sizeof(size_t); - switch (required) { - - case 1: - if (ORTE_SUCCESS != ( - rc = orte_dss_pack_byte(buffer, src, num, ORTE_BYTE))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 2: - if (ORTE_SUCCESS != ( - rc = orte_dss_pack_int16(buffer, src, num, ORTE_INT16))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 4: - if (ORTE_SUCCESS != ( - rc = orte_dss_pack_int32(buffer, src, num, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 8: - if (ORTE_SUCCESS != ( - rc = orte_dss_pack_int64(buffer, src, num, ORTE_INT64))) { - ORTE_ERROR_LOG(rc); - } - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - return rc; -} -#endif - -/* - * ORTE_BYTE_OBJECT - */ -int orte_dss_pack_byte_object(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num, - orte_data_type_t type) -{ - orte_byte_object_t **sbyteptr; - orte_std_cntr_t i, n; - int ret; - - sbyteptr = (orte_byte_object_t **) src; - - for (i = 0; i < num; ++i) { - n = sbyteptr[i]->size; - if (ORTE_SUCCESS != (ret = orte_dss_pack_std_cntr(buffer, &n, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - return ret; - } - if (0 < n) { - if (ORTE_SUCCESS != (ret = - orte_dss_pack_byte(buffer, sbyteptr[i]->bytes, n, ORTE_BYTE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/dss/dss_print.c b/orte/dss/dss_print.c deleted file mode 100644 index 5ce88a6dc3..0000000000 --- a/orte/dss/dss_print.c +++ /dev/null @@ -1,522 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" - -int orte_dss_print(char **output, char *prefix, void *src, orte_data_type_t type) -{ - int rc; - orte_dss_type_info_t *info; - - /* check for error */ - if (NULL == output) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* Lookup the print function for this type and call it */ - - if (!(type < orte_dss_types->size) || - (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type)))) { - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - if (ORTE_SUCCESS != (rc = info->odti_print_fn(output, prefix, src, type))) { - ORTE_ERROR_LOG(rc); - } - - - return rc; -} - -/* - * STANDARD PRINT FUNCTIONS FOR SYSTEM TYPES - */ -int orte_dss_print_byte(char **output, char *prefix, uint8_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_BYTE\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_BYTE\tValue: %x", prefix, *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_string(char **output, char *prefix, char *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_STRING\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_STRING\tValue: %s", prefx, src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_size(char **output, char *prefix, size_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_SIZE\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_SIZE\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_pid(char **output, char *prefix, pid_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_PID\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_PID\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_bool(char **output, char *prefix, bool *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_BOOL\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_BOOL\tValue: %s", prefx, *src ? "TRUE" : "FALSE"); - - return ORTE_SUCCESS; -} - -int orte_dss_print_int(char **output, char *prefix, int *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_INT\tValue: %ld", prefx, (long) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_uint(char **output, char *prefix, int *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_UINT\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_uint8(char **output, char *prefix, uint8_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT8\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_UINT8\tValue: %u", prefx, (unsigned int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_uint16(char **output, char *prefix, uint16_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT16\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_UINT16\tValue: %u", prefx, (unsigned int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_uint32(char **output, char *prefix, uint32_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT32\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_UINT32\tValue: %u", prefx, (unsigned int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_int8(char **output, char *prefix, int8_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT8\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_INT8\tValue: %d", prefx, (int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_int16(char **output, char *prefix, int16_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT16\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_INT16\tValue: %d", prefx, (int) *src); - - return ORTE_SUCCESS; -} - -int orte_dss_print_int32(char **output, char *prefix, int32_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT32\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_INT32\tValue: %d", prefx, (int) *src); - - return ORTE_SUCCESS; -} -int orte_dss_print_uint64(char **output, char *prefix, -#ifdef HAVE_INT64_T - uint64_t *src, -#else - void *src, -#endif /* HAVE_INT64_T */ - orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_UINT64\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - -#ifdef HAVE_INT64_T - asprintf(output, "%sData type: ORTE_UINT64\tValue: %lu", prefx, (unsigned long) *src); -#else - asprintf(output, "%sData type: ORTE_UINT64\tValue: unsupported", prefx); -#endif /* HAVE_INT64_T */ - - return ORTE_SUCCESS; -} - -int orte_dss_print_int64(char **output, char *prefix, -#ifdef HAVE_INT64_T - int64_t *src, -#else - void *src, -#endif /* HAVE_INT64_T */ - orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_INT64\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - -#ifdef HAVE_INT64_T - asprintf(output, "%sData type: ORTE_INT64\tValue: %ld", prefx, (long) *src); -#else - asprintf(output, "%sData type: ORTE_INT64\tValue: unsupported", prefx); -#endif /* HAVE_INT64_T */ - - return ORTE_SUCCESS; -} - -int orte_dss_print_null(char **output, char *prefix, void *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_NULL\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_NULL", prefx); - - return ORTE_SUCCESS; -} - - -/* PRINT FUNCTIONS FOR GENERIC ORTE TYPES */ -/* - * ORTE_STD_CNTR - */ -int orte_dss_print_std_cntr(char **output, char *prefix, orte_std_cntr_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_STD_CNTR\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_STD_CNTR\tValue: %lu", prefx, (unsigned long) *src); - return ORTE_SUCCESS; -} - -/* - * ORTE_DATA_TYPE - */ -int orte_dss_print_data_type(char **output, char *prefix, orte_data_type_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_DATA_TYPE\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_DATA_TYPE\tValue: %lu", prefx, (unsigned long) *src); - return ORTE_SUCCESS; -} - -/* - * ORTE_DATA_VALUE - */ -int orte_dss_print_data_value(char **output, char *prefix, orte_data_value_t *src, orte_data_type_t type) -{ - char *pfx, *tmp1, *tmp2; - int rc; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - if (NULL != prefix) { - asprintf(output, "%sData type: ORTE_DATA_VALUE\tValue: NULL pointer", prefix); - } else { - asprintf(output, "Data type: ORTE_DATA_VALUE\tValue: NULL pointer"); - } - return ORTE_SUCCESS; - } - - if (NULL != prefix) { - asprintf(&pfx, "%s\t", prefix); - asprintf(&tmp1, "%sData type: ORTE_DATA_VALUE:\n", prefix); - } else { - asprintf(&tmp1, "Data type: ORTE_DATA_VALUE:\n"); - asprintf(&pfx, "\t"); - } - - /* if data is included, print it */ - if (ORTE_UNDEF == src->type) { /* undefined data type - just report it */ - asprintf(&tmp2, "%sData type: ORTE_UNDEF\tValue: N/A", pfx); - } else if (NULL != src->data) { - if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->data, src->type))) { - ORTE_ERROR_LOG(rc); - if (NULL != tmp1) free(tmp1); - if (NULL != pfx) free(pfx); - *output = NULL; - return rc; - } - } else { /* indicate the data field was NULL */ - asprintf(&tmp2, "%sData field is NULL", pfx); - } - - asprintf(output, "%s%s", tmp1, tmp2); - free(tmp1); - free(tmp2); - if (NULL != pfx) free(pfx); - - return ORTE_SUCCESS; -} - -#if OPAL_ENABLE_FT == 1 -/* - * ORTE_CKPT_CMD - */ -int orte_dss_print_ckpt_cmd(char **output, char *prefix, size_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_CKPT_CMD\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_CKPT_CMD\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} -#endif - -/* - * ORTE_BYTE_OBJECT - */ -int orte_dss_print_byte_object(char **output, char *prefix, orte_byte_object_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_BYTE_OBJECT\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_BYTE_OBJECT\tSize: %lu", prefx, (unsigned long) src->size); - - return ORTE_SUCCESS; -} diff --git a/orte/dss/dss_register.c b/orte/dss/dss_register.c deleted file mode 100644 index 2d7ced194d..0000000000 --- a/orte/dss/dss_register.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" - -#include "orte/dss/dss.h" -#include "orte/dss/dss_internal.h" - - -int orte_dss_register(orte_dss_pack_fn_t pack_fn, - orte_dss_unpack_fn_t unpack_fn, - orte_dss_copy_fn_t copy_fn, - orte_dss_compare_fn_t compare_fn, - orte_dss_size_fn_t size_fn, - orte_dss_print_fn_t print_fn, - orte_dss_release_fn_t release_fn, - bool structured, - const char *name, orte_data_type_t *type) -{ - int ret; - orte_dss_type_info_t *info, **ptr; - orte_std_cntr_t i; - orte_data_type_t j; - - /* Check for bozo cases */ - - if (NULL == pack_fn || NULL == unpack_fn || NULL == copy_fn || NULL == compare_fn || - NULL == size_fn || NULL == print_fn || NULL == name || NULL == type) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* check if this entry already exists - if so, error - we do NOT allow multiple type registrations */ - ptr = (orte_dss_type_info_t**)(orte_dss_types->addr); - for (i=0, j=0; j < orte_dss_num_reg_types && - i < orte_dss_types->size; i++) { - if (NULL != ptr[i]) { - j++; - /* check if the name exists */ - if (0 == strcmp(ptr[i]->odti_name, name)) { - ORTE_ERROR_LOG(ORTE_ERR_DATA_TYPE_REDEF); - return ORTE_ERR_DATA_TYPE_REDEF; - } - /* check if the specified type exists */ - if (*type > 0 && ptr[i]->odti_type == *type) { - ORTE_ERROR_LOG(ORTE_ERR_DATA_TYPE_REDEF); - return ORTE_ERR_DATA_TYPE_REDEF; - } - } - } - - /* if type is given (i.e., *type > 0), then just use it. - * otherwise, go and get a new type id from the name - * service - */ - if (0 >= *type) { - if (ORTE_SUCCESS != (ret = orte_ns.define_data_type(name, type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* Add a new entry to the table */ - info = (orte_dss_type_info_t*) OBJ_NEW(orte_dss_type_info_t); - if (NULL == info) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - info->odti_type = *type; - info->odti_name = strdup(name); - info->odti_pack_fn = pack_fn; - info->odti_unpack_fn = unpack_fn; - info->odti_copy_fn = copy_fn; - info->odti_compare_fn = compare_fn; - info->odti_size_fn = size_fn; - info->odti_print_fn = print_fn; - info->odti_release_fn = release_fn; - info->odti_structured = structured; - if (ORTE_SUCCESS != (ret = orte_pointer_array_set_item(orte_dss_types, *type, info))) { - ORTE_ERROR_LOG(ret); - } - - /* All done */ - - return ret; -} diff --git a/orte/dss/dss_types.h b/orte/dss/dss_types.h deleted file mode 100644 index 85404befae..0000000000 --- a/orte/dss/dss_types.h +++ /dev/null @@ -1,109 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Buffer management types. - */ - -#ifndef ORTE_DSS_TYPES_H_ -#define ORTE_DSS_TYPES_H_ - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "opal/class/opal_object.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* define arithmetic operations for readability */ -typedef uint8_t orte_dss_arith_op_t; - -#define ORTE_DSS_ADD 1 -#define ORTE_DSS_SUB 2 -#define ORTE_DSS_MUL 3 -#define ORTE_DSS_DIV 4 - - -/* Data value object */ -typedef struct { - opal_object_t super; /* required for this to be an object */ - orte_data_type_t type; /* the type of value stored */ - void *data; -} orte_data_value_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_data_value_t); - -#define ORTE_DATA_VALUE_EMPTY { OPAL_OBJ_STATIC_INIT(orte_data_value_t), ORTE_UNDEF, NULL} - -/* structured-unstructured data flags */ -#define ORTE_DSS_STRUCTURED true -#define ORTE_DSS_UNSTRUCTURED false - -/** - * buffer type - */ -typedef uint8_t orte_dss_buffer_type_t; -#define ORTE_DSS_BUFFER_NON_DESC 0x00 -#define ORTE_DSS_BUFFER_FULLY_DESC 0x01 - -#define ORTE_DSS_BUFFER_TYPE_HTON(h); -#define ORTE_DSS_BUFFER_TYPE_NTOH(h); - -/** - * Structure for holding a buffer to be used with the RML or OOB - * subsystems. - */ - struct orte_buffer_t { - /** First member must be the object's parent */ - opal_object_t parent; - /** type of buffer */ - orte_dss_buffer_type_t type; - /** Start of my memory */ - char *base_ptr; - /** Where the next data will be packed to (within the allocated - memory starting at base_ptr) */ - char *pack_ptr; - /** Where the next data will be unpacked from (within the - allocated memory starting as base_ptr) */ - char *unpack_ptr; - - /** Number of bytes allocated (starting at base_ptr), - typically in multiples of orte_dps_pages, but may not be - if the buffer was initialized with orte_dps_load(). */ - size_t bytes_allocated; - /** Number of bytes used by the buffer (i.e., amount of data -- - including overhead -- packed in the buffer) */ - size_t bytes_used; - }; - /** - * Convenience typedef - */ - typedef struct orte_buffer_t orte_buffer_t; - - /** formalize the declaration */ - ORTE_DECLSPEC OBJ_CLASS_DECLARATION (orte_buffer_t); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* ORTE_DSS_TYPES_H */ diff --git a/orte/dss/dss_unpack.c b/orte/dss/dss_unpack.c deleted file mode 100644 index 04d7f38d00..0000000000 --- a/orte/dss/dss_unpack.c +++ /dev/null @@ -1,617 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include -#ifdef HAVE_NETINET_IN_H -#include -#endif - -#include "opal/types.h" -#include "opal/util/output.h" -#include "opal/mca/backtrace/backtrace.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/dss/dss_internal.h" - -int orte_dss_unpack(orte_buffer_t *buffer, void *dst, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int rc, ret; - orte_std_cntr_t local_num, n=1; - orte_data_type_t local_type; - - /* check for error */ - if (NULL == buffer || NULL == dst || NULL == num_vals) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* if user provides a zero for num_vals, then there is no storage allocated - * so return an appropriate error - */ - if (0 == *num_vals) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_INADEQUATE_SPACE); - return ORTE_ERR_UNPACK_INADEQUATE_SPACE; - } - - /** Unpack the declared number of values - * REMINDER: it is possible that the buffer is corrupted and that - * the DSS will *think* there is a proper orte_std_cntr_t variable at the - * beginning of the unpack region - but that the value is bogus (e.g., just - * a byte field in a string array that so happens to have a value that - * matches the orte_std_cntr_t data type flag). Therefore, this error check is - * NOT completely safe. This is true for ALL unpack functions, not just - * orte_std_cntr_t as used here. - */ - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - if (ORTE_SUCCESS != ( - rc = orte_dss_get_data_type(buffer, &local_type))) { - *num_vals = 0; - return rc; - } - if (ORTE_STD_CNTR != local_type) { /* if the length wasn't first, then error */ - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE); - *num_vals = 0; - return ORTE_ERR_UNPACK_FAILURE; - } - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss_unpack_std_cntr(buffer, &local_num, &n, ORTE_STD_CNTR))) { - *num_vals = 0; - return rc; - } - - /** if the storage provided is inadequate, set things up - * to unpack as much as we can and to return an error code - * indicating that everything was not unpacked - the buffer - * is left in a state where it can not be further unpacked. - */ - if (local_num > *num_vals) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_INADEQUATE_SPACE); - local_num = *num_vals; - ret = ORTE_ERR_UNPACK_INADEQUATE_SPACE; - } else { /** enough or more than enough storage */ - *num_vals = local_num; /** let the user know how many we actually unpacked */ - ret = ORTE_SUCCESS; - } - - /** Unpack the value(s) */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dst, &local_num, type))) { - *num_vals = 0; - ret = rc; - } - - return ret; -} - -int orte_dss_unpack_buffer(orte_buffer_t *buffer, void *dst, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int rc; - orte_data_type_t local_type; - orte_dss_type_info_t *info; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_buffer( %p, %p, %lu, %d )\n", - (void*)buffer, dst, (long unsigned int)*num_vals, (int)type ) ); - - /** Unpack the declared data type */ - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - if (ORTE_SUCCESS != (rc = orte_dss_get_data_type(buffer, &local_type))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* if the data types don't match, then return an error */ - if (type != local_type) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_MISMATCH); - return ORTE_ERR_PACK_MISMATCH; - } - } - - /* Lookup the unpack function for this type and call it */ - - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, type))) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE); - return ORTE_ERR_UNPACK_FAILURE; - } - - rc = info->odti_unpack_fn(buffer, dst, num_vals, type); - return rc; -} - - -/* UNPACK GENERIC SYSTEM TYPES */ - -/* - * BOOL - */ -int orte_dss_unpack_bool(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_data_type_t remote_type; - - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - /* see what type was actually packed */ - if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - if (remote_type == DSS_TYPE_BOOL) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_BOOL))) { - ORTE_ERROR_LOG(ret); - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(bool, remote_type, ret); - } - return ret; -} - -/* - * INT - */ -int orte_dss_unpack_int(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_data_type_t remote_type; - - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - /* see what type was actually packed */ - if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - if (remote_type == DSS_TYPE_INT) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_INT))) { - ORTE_ERROR_LOG(ret); - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(int, remote_type, ret); - } - - return ret; -} - -/* - * SIZE_T - */ -int orte_dss_unpack_sizet(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_data_type_t remote_type; - - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - /* see what type was actually packed */ - if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - if (remote_type == DSS_TYPE_SIZE_T) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_SIZE_T))) { - ORTE_ERROR_LOG(ret); - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(size_t, remote_type, ret); - } - - return ret; -} - -/* - * PID_T - */ -int orte_dss_unpack_pid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_data_type_t remote_type; - - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - /* see what type was actually packed */ - if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - if (remote_type == DSS_TYPE_PID_T) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, DSS_TYPE_PID_T))) { - ORTE_ERROR_LOG(ret); - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(pid_t, remote_type, ret); - } - - return ret; -} - - -/* UNPACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ - -/* - * NULL - */ -int orte_dss_unpack_null(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_null * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, *num_vals)) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - memcpy(dest, buffer->unpack_ptr, *num_vals); - - /* update buffer pointer */ - buffer->unpack_ptr += *num_vals; - - return ORTE_SUCCESS; -} - -/* - * BYTE, CHAR, INT8 - */ -int orte_dss_unpack_byte(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_byte * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, *num_vals)) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - memcpy(dest, buffer->unpack_ptr, *num_vals); - - /* update buffer pointer */ - buffer->unpack_ptr += *num_vals; - - return ORTE_SUCCESS; -} - -int orte_dss_unpack_int16(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint16_t tmp, *desttmp = (uint16_t*) dest; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_int16 * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); - desttmp[i] = ntohs(tmp); - buffer->unpack_ptr += sizeof(tmp); - } - - return ORTE_SUCCESS; -} - -int orte_dss_unpack_int32(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint32_t tmp, *desttmp = (uint32_t*) dest; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_int32 * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); - desttmp[i] = ntohl(tmp); - buffer->unpack_ptr += sizeof(tmp); - } - - return ORTE_SUCCESS; -} - -int orte_dss_unpack_int64(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - orte_std_cntr_t i; - uint64_t tmp, *desttmp = (uint64_t*) dest; - - OPAL_OUTPUT( ( orte_dss_verbose, "orte_dss_unpack_int64 * %d\n", (int)*num_vals ) ); - /* check to see if there's enough data in buffer */ - if (orte_dss_too_small(buffer, (*num_vals)*sizeof(tmp))) { - return ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); - desttmp[i] = ntoh64(tmp); - buffer->unpack_ptr += sizeof(tmp); - } - - return ORTE_SUCCESS; -} - -int orte_dss_unpack_string(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - orte_std_cntr_t i, len, n=1; - char **sdest = (char**) dest; - - for (i = 0; i < (*num_vals); ++i) { - if (ORTE_SUCCESS != (ret = orte_dss_unpack_std_cntr(buffer, &len, &n, ORTE_STD_CNTR))) { - return ret; - } - if (0 == len) { /* zero-length string - unpack the NULL */ - sdest[i] = NULL; - } else { - sdest[i] = (char*)malloc(len); - if (NULL == sdest[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (ret = orte_dss_unpack_byte(buffer, sdest[i], &len, ORTE_BYTE))) { - return ret; - } - } - } - - return ORTE_SUCCESS; -} - - -/* UNPACK FUNCTIONS FOR GENERIC ORTE TYPES */ - -/* - * ORTE_STD_CNTR - */ -int orte_dss_unpack_std_cntr(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int ret; - - /* turn around and unpack the real type */ - ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_STD_CNTR_T); - - return ret; -} - -/* - * ORTE_DATA_TYPE - */ -int orte_dss_unpack_data_type(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int ret; - - /* turn around and unpack the real type */ - ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_DATA_TYPE_T); - - return ret; -} - -#if OPAL_ENABLE_FT == 1 -/* - * ORTE_CKPT_CMD - */ -int orte_dss_unpack_ckpt_cmd(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num, - orte_data_type_t type) -{ - size_t required; - int rc; - - required = sizeof(size_t); - switch (required) { - - case 1: - if (ORTE_SUCCESS != ( - rc = orte_dss_unpack_byte(buffer, dest, num, ORTE_BYTE))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 2: - if (ORTE_SUCCESS != ( - rc = orte_dss_unpack_int16(buffer, dest, num, ORTE_INT16))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 4: - if (ORTE_SUCCESS != ( - rc = orte_dss_unpack_int32(buffer, dest, num, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - } - break; - - case 8: - if (ORTE_SUCCESS != ( - rc = orte_dss_unpack_int64(buffer, dest, num, ORTE_INT64))) { - ORTE_ERROR_LOG(rc); - } - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - return rc; -} -#endif - -/* - * ORTE_DATA_VALUE - */ -int orte_dss_unpack_data_value(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num, - orte_data_type_t type) -{ - orte_dss_type_info_t *info; - orte_data_value_t **ddv; - orte_std_cntr_t i, n; - orte_data_type_t dt; - size_t nsize; - int ret; - - ddv = (orte_data_value_t **) dest; - - for (i = 0; i < *num; ++i) { - /* see what the data type is */ - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss_get_data_type(buffer, &dt))) { - return ret; - } - - /* if it is ORTE_NULL, then do nothing */ - if (ORTE_NULL == dt) continue; - - /* otherwise, allocate the new object and set the type */ - - ddv[i] = OBJ_NEW(orte_data_value_t); - if (NULL == ddv[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - ddv[i]->type = dt; - - /* if it is UNDEF, then nothing more to do */ - if (ORTE_UNDEF == ddv[i]->type) continue; - - /* get enough memory to hold it */ - if (ORTE_SUCCESS != (ret = orte_dss.size(&nsize, NULL, ddv[i]->type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - ddv[i]->data = (void*)malloc(nsize); - if (NULL == ddv[i]->data) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* Lookup the unpack function for this type and call it */ - - if (NULL == (info = (orte_dss_type_info_t*)orte_pointer_array_get_item(orte_dss_types, ddv[i]->type))) { - ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE); - return ORTE_ERR_PACK_FAILURE; - } - - if (info->odti_structured) { - n=1; - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, &(ddv[i]->data), &n, ddv[i]->type))) { - return ret; - } - } else { - n=1; - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, ddv[i]->data, &n, ddv[i]->type))) { - return ret; - } - } - } - - return ORTE_SUCCESS; -} - - -/* - * ORTE_BYTE_OBJECT - */ -int orte_dss_unpack_byte_object(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num, - orte_data_type_t type) -{ - int ret; - orte_std_cntr_t i, n, m=1; - orte_byte_object_t **dbyteptr; - - dbyteptr = (orte_byte_object_t**)dest; - n = *num; - for(i=0; isize), &m, ORTE_STD_CNTR))) { - return ret; - } - if (0 < dbyteptr[i]->size) { - dbyteptr[i]->bytes = (uint8_t*)malloc(dbyteptr[i]->size); - if (NULL == dbyteptr[i]->bytes) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (ret = orte_dss_unpack_byte(buffer, (dbyteptr[i]->bytes), - &(dbyteptr[i]->size), ORTE_BYTE))) { - return ret; - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/include/orte/Makefile.am b/orte/include/orte/Makefile.am index dd7599d6bc..41db25cb15 100644 --- a/orte/include/orte/Makefile.am +++ b/orte/include/orte/Makefile.am @@ -20,8 +20,8 @@ # orte/include//Makefile.am headers += \ - orte/orte_constants.h \ - orte/orte_types.h + orte/constants.h \ + orte/types.h nodist_headers += \ orte/version.h diff --git a/orte/include/orte/orte_constants.h b/orte/include/orte/constants.h similarity index 59% rename from orte/include/orte/orte_constants.h rename to orte/include/orte/constants.h index 0059e451d1..eb072fced3 100644 --- a/orte/include/orte/orte_constants.h +++ b/orte/include/orte/constants.h @@ -22,16 +22,10 @@ #include "opal/constants.h" #include "orte_config.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS #define ORTE_ERR_BASE OPAL_ERR_MAX -/* define the results values for comparisons so we can change them in only one place */ -#define ORTE_VALUE1_GREATER +1 -#define ORTE_VALUE2_GREATER -1 -#define ORTE_EQUAL 0 enum { /* Error codes inherited from OPAL. Still enum values so that we @@ -60,7 +54,17 @@ enum { ORTE_ERR_FILE_READ_FAILURE = OPAL_ERR_FILE_READ_FAILURE, ORTE_ERR_FILE_WRITE_FAILURE = OPAL_ERR_FILE_WRITE_FAILURE, ORTE_ERR_FILE_OPEN_FAILURE = OPAL_ERR_FILE_OPEN_FAILURE, - + ORTE_ERR_PACK_MISMATCH = OPAL_ERR_PACK_MISMATCH, + ORTE_ERR_PACK_FAILURE = OPAL_ERR_PACK_FAILURE, + ORTE_ERR_UNPACK_FAILURE = OPAL_ERR_UNPACK_FAILURE, + ORTE_ERR_UNPACK_INADEQUATE_SPACE = OPAL_ERR_UNPACK_INADEQUATE_SPACE, + ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER = OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER, + ORTE_ERR_TYPE_MISMATCH = OPAL_ERR_TYPE_MISMATCH, + ORTE_ERR_OPERATION_UNSUPPORTED = OPAL_ERR_OPERATION_UNSUPPORTED, + ORTE_ERR_UNKNOWN_DATA_TYPE = OPAL_ERR_UNKNOWN_DATA_TYPE, + ORTE_ERR_BUFFER = OPAL_ERR_BUFFER, + ORTE_ERR_DATA_TYPE_REDEF = OPAL_ERR_DATA_TYPE_REDEF, + ORTE_ERR_DATA_OVERWRITE_ATTEMPT = OPAL_ERR_DATA_OVERWRITE_ATTEMPT, /* error codes specific to ORTE - don't forget to update orte/util/error_strings.c when adding new error codes!! Otherwise, the error reporting system will potentially crash, @@ -69,43 +73,32 @@ enum { ORTE_ERR_RECV_LESS_THAN_POSTED = (ORTE_ERR_BASE - 1), ORTE_ERR_RECV_MORE_THAN_POSTED = (ORTE_ERR_BASE - 2), ORTE_ERR_NO_MATCH_YET = (ORTE_ERR_BASE - 3), - ORTE_ERR_BUFFER = (ORTE_ERR_BASE - 4), - ORTE_ERR_REQUEST = (ORTE_ERR_BASE - 5), - ORTE_ERR_NO_CONNECTION_ALLOWED = (ORTE_ERR_BASE - 6), - ORTE_ERR_CONNECTION_REFUSED = (ORTE_ERR_BASE - 7), - ORTE_ERR_CONNECTION_FAILED = (ORTE_ERR_BASE - 8), - ORTE_ERR_PACK_MISMATCH = (ORTE_ERR_BASE - 9), - ORTE_ERR_PACK_FAILURE = (ORTE_ERR_BASE - 10), - ORTE_ERR_UNPACK_FAILURE = (ORTE_ERR_BASE - 11), - ORTE_ERR_COMM_FAILURE = (ORTE_ERR_BASE - 12), - ORTE_ERR_UNPACK_INADEQUATE_SPACE = (ORTE_ERR_BASE - 13), - ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER = (ORTE_ERR_BASE - 14), - ORTE_ERR_GPR_DATA_CORRUPT = (ORTE_ERR_BASE - 15), - ORTE_ERR_TYPE_MISMATCH = (ORTE_ERR_BASE - 16), - ORTE_ERR_COMPARE_FAILURE = (ORTE_ERR_BASE - 17), - ORTE_ERR_COPY_FAILURE = (ORTE_ERR_BASE - 18), - ORTE_ERR_UNKNOWN_DATA_TYPE = (ORTE_ERR_BASE - 19), - ORTE_ERR_DATA_TYPE_REDEF = (ORTE_ERR_BASE - 20), - ORTE_ERR_DATA_OVERWRITE_ATTEMPT = (ORTE_ERR_BASE - 21), - ORTE_ERR_OPERATION_UNSUPPORTED = (ORTE_ERR_BASE - 22), - ORTE_ERR_PROC_STATE_MISSING = (ORTE_ERR_BASE - 23), - ORTE_ERR_PROC_EXIT_STATUS_MISSING = (ORTE_ERR_BASE - 24), - ORTE_ERR_INDETERMINATE_STATE_INFO = (ORTE_ERR_BASE - 25), - ORTE_ERR_NODE_FULLY_USED = (ORTE_ERR_BASE - 26), - ORTE_ERR_INVALID_NUM_PROCS = (ORTE_ERR_BASE - 27), - ORTE_ERR_SILENT = (ORTE_ERR_BASE - 28), - ORTE_ERR_ADDRESSEE_UNKNOWN = (ORTE_ERR_BASE - 29), - ORTE_ERR_SYS_LIMITS_PIPES = (ORTE_ERR_BASE - 30), - ORTE_ERR_PIPE_SETUP_FAILURE = (ORTE_ERR_BASE - 31), - ORTE_ERR_SYS_LIMITS_CHILDREN = (ORTE_ERR_BASE - 32), - ORTE_ERR_FAILED_GET_TERM_ATTRS = (ORTE_ERR_BASE - 33), - ORTE_ERR_WDIR_NOT_FOUND = (ORTE_ERR_BASE - 34), - ORTE_ERR_EXE_NOT_FOUND = (ORTE_ERR_BASE - 35), - ORTE_ERR_PIPE_READ_FAILURE = (ORTE_ERR_BASE - 36), - ORTE_ERR_EXE_NOT_ACCESSIBLE = (ORTE_ERR_BASE - 37), - ORTE_ERR_FAILED_TO_START = (ORTE_ERR_BASE - 38), - ORTE_ERR_FILE_NOT_EXECUTABLE = (ORTE_ERR_BASE - 39), - ORTE_ERR_HNP_COULD_NOT_START = (ORTE_ERR_BASE - 40) + ORTE_ERR_REQUEST = (ORTE_ERR_BASE - 4), + ORTE_ERR_NO_CONNECTION_ALLOWED = (ORTE_ERR_BASE - 5), + ORTE_ERR_CONNECTION_REFUSED = (ORTE_ERR_BASE - 6), + ORTE_ERR_CONNECTION_FAILED = (ORTE_ERR_BASE - 7), + ORTE_ERR_COMM_FAILURE = (ORTE_ERR_BASE - 8), + ORTE_ERR_GPR_DATA_CORRUPT = (ORTE_ERR_BASE - 9), + ORTE_ERR_COMPARE_FAILURE = (ORTE_ERR_BASE - 10), + ORTE_ERR_COPY_FAILURE = (ORTE_ERR_BASE - 11), + ORTE_ERR_PROC_STATE_MISSING = (ORTE_ERR_BASE - 12), + ORTE_ERR_PROC_EXIT_STATUS_MISSING = (ORTE_ERR_BASE - 13), + ORTE_ERR_INDETERMINATE_STATE_INFO = (ORTE_ERR_BASE - 14), + ORTE_ERR_NODE_FULLY_USED = (ORTE_ERR_BASE - 15), + ORTE_ERR_INVALID_NUM_PROCS = (ORTE_ERR_BASE - 16), + ORTE_ERR_SILENT = (ORTE_ERR_BASE - 17), + ORTE_ERR_ADDRESSEE_UNKNOWN = (ORTE_ERR_BASE - 18), + ORTE_ERR_SYS_LIMITS_PIPES = (ORTE_ERR_BASE - 19), + ORTE_ERR_PIPE_SETUP_FAILURE = (ORTE_ERR_BASE - 20), + ORTE_ERR_SYS_LIMITS_CHILDREN = (ORTE_ERR_BASE - 21), + ORTE_ERR_FAILED_GET_TERM_ATTRS = (ORTE_ERR_BASE - 22), + ORTE_ERR_WDIR_NOT_FOUND = (ORTE_ERR_BASE - 23), + ORTE_ERR_EXE_NOT_FOUND = (ORTE_ERR_BASE - 24), + ORTE_ERR_PIPE_READ_FAILURE = (ORTE_ERR_BASE - 25), + ORTE_ERR_EXE_NOT_ACCESSIBLE = (ORTE_ERR_BASE - 26), + ORTE_ERR_FAILED_TO_START = (ORTE_ERR_BASE - 27), + ORTE_ERR_FILE_NOT_EXECUTABLE = (ORTE_ERR_BASE - 28), + ORTE_ERR_HNP_COULD_NOT_START = (ORTE_ERR_BASE - 29) }; #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) @@ -113,9 +106,7 @@ enum { /* include the prototype for the error-to-string converter */ ORTE_DECLSPEC const char* orte_err2str(int errnum); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif /* ORTE_CONSTANTS_H */ diff --git a/orte/include/orte/orte_types.h b/orte/include/orte/orte_types.h deleted file mode 100644 index 2fd97a906f..0000000000 --- a/orte/include/orte/orte_types.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#ifndef ORTE_TYPES_H -#define ORTE_TYPES_H - -#include "orte_config.h" - -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -/** - * Supported datatypes for messaging and storage operations. - */ - -typedef uint8_t orte_data_type_t; /** data type indicators used in ORTE */ -#define ORTE_DATA_TYPE_T ORTE_UINT8 -#define ORTE_DSS_ID_MAX UINT8_MAX -#define ORTE_DSS_ID_INVALID ORTE_DSS_ID_MAX - -typedef int32_t orte_std_cntr_t; /** standard counters used in ORTE */ -#define ORTE_STD_CNTR_T ORTE_INT32 -#define ORTE_STD_CNTR_MAX INT32_MAX -#define ORTE_STD_CNTR_MIN INT32_MIN -#define ORTE_STD_CNTR_INVALID -1 - -/* define a structure to hold generic byte objects */ -typedef struct { - orte_std_cntr_t size; - uint8_t *bytes; -} orte_byte_object_t; - -/** - * handle differences in iovec - */ - -#if defined(__APPLE__) || defined(__WINDOWS__) -typedef char* orte_iov_base_ptr_t; -#else -typedef void* orte_iov_base_ptr_t; -#endif - - -#define ORTE_UNDEF (orte_data_type_t) 0 /**< type hasn't been defined yet */ -#define ORTE_BYTE (orte_data_type_t) 1 /**< a byte of data */ -#define ORTE_BOOL (orte_data_type_t) 2 /**< boolean */ -#define ORTE_STRING (orte_data_type_t) 3 /**< a NULL terminated string */ -#define ORTE_SIZE (orte_data_type_t) 4 /**< the generic size_t */ -#define ORTE_PID (orte_data_type_t) 5 /**< process pid */ - /* all the integer flavors */ -#define ORTE_INT (orte_data_type_t) 6 /**< generic integer */ -#define ORTE_INT8 (orte_data_type_t) 7 /**< an 8-bit integer */ -#define ORTE_INT16 (orte_data_type_t) 8 /**< a 16-bit integer */ -#define ORTE_INT32 (orte_data_type_t) 9 /**< a 32-bit integer */ -#define ORTE_INT64 (orte_data_type_t) 10 /**< a 64-bit integer */ - /* all the unsigned integer flavors */ -#define ORTE_UINT (orte_data_type_t) 11 /**< generic unsigned integer */ -#define ORTE_UINT8 (orte_data_type_t) 12 /**< an 8-bit unsigned integer */ -#define ORTE_UINT16 (orte_data_type_t) 13 /**< a 16-bit unsigned integer */ -#define ORTE_UINT32 (orte_data_type_t) 14 /**< a 32-bit unsigned integer */ -#define ORTE_UINT64 (orte_data_type_t) 15 /**< a 64-bit unsigned integer */ - - /* we don't support floating point types */ - - /* orte-specific typedefs - grouped according to the subystem that handles - * their packing/unpacking */ - /* General types - packing/unpacking handled within DSS */ -#define ORTE_BYTE_OBJECT (orte_data_type_t) 16 /**< byte object structure */ -#define ORTE_DATA_TYPE (orte_data_type_t) 17 /**< data type */ -#define ORTE_NULL (orte_data_type_t) 18 /**< don't interpret data type */ -#define ORTE_DATA_VALUE (orte_data_type_t) 19 /**< data value */ -#define ORTE_ARITH_OP (orte_data_type_t) 20 /**< arithmetic operation flag */ -#define ORTE_STD_CNTR (orte_data_type_t) 21 /**< standard counter type */ - /* Name Service types */ -#define ORTE_NAME (orte_data_type_t) 22 /**< an orte_process_name_t */ -#define ORTE_VPID (orte_data_type_t) 23 /**< a vpid */ -#define ORTE_JOBID (orte_data_type_t) 24 /**< a jobid */ -#define ORTE_NODEID (orte_data_type_t) 25 /**< a node id */ - /* SMR types */ -#define ORTE_NODE_STATE (orte_data_type_t) 26 /**< node status flag */ -#define ORTE_PROC_STATE (orte_data_type_t) 27 /**< process/resource status */ -#define ORTE_JOB_STATE (orte_data_type_t) 28 /**< job status flag */ -#define ORTE_EXIT_CODE (orte_data_type_t) 29 /**< process exit code */ - /* GPR types */ -#define ORTE_GPR_KEYVAL (orte_data_type_t) 30 /**< registry key-value pair */ -#define ORTE_GPR_NOTIFY_ACTION (orte_data_type_t) 31 /**< registry notify action */ -#define ORTE_GPR_TRIGGER_ACTION (orte_data_type_t) 32 /**< registry trigger action */ -#define ORTE_GPR_CMD (orte_data_type_t) 33 /**< registry command */ -#define ORTE_GPR_SUBSCRIPTION_ID (orte_data_type_t) 34 /**< registry notify id tag */ -#define ORTE_GPR_TRIGGER_ID (orte_data_type_t) 35 /**< registry notify id tag */ -#define ORTE_GPR_VALUE (orte_data_type_t) 36 /**< registry return value */ -#define ORTE_GPR_ADDR_MODE (orte_data_type_t) 37 /**< Addressing mode for registry cmds */ -#define ORTE_GPR_SUBSCRIPTION (orte_data_type_t) 38 /**< describes data returned by subscription */ -#define ORTE_GPR_TRIGGER (orte_data_type_t) 39 /**< describes trigger conditions */ -#define ORTE_GPR_NOTIFY_DATA (orte_data_type_t) 40 /**< data returned from a subscription */ -#define ORTE_GPR_NOTIFY_MSG (orte_data_type_t) 41 /**< notify message containing notify_data objects */ -#define ORTE_GPR_NOTIFY_MSG_TYPE (orte_data_type_t) 42 /**< notify message type (subscription or trigger) */ -#define ORTE_GPR_SEARCH (orte_data_type_t) 43 /**< search criteria */ -#define ORTE_GPR_UPDATE (orte_data_type_t) 44 /**< update data on the registry */ -/* Resource Manager types */ -#define ORTE_APP_CONTEXT (orte_data_type_t) 45 /**< argv and enviro arrays */ -#define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 46 /**< application context mapping array */ -#define ORTE_NODE_DESC (orte_data_type_t) 47 /**< describes capabilities of nodes */ -#define ORTE_SLOT_DESC (orte_data_type_t) 48 /**< describes slot allocations/reservations */ -#define ORTE_RAS_NODE (orte_data_type_t) 49 /**< node information */ -#define ORTE_JOB_MAP (orte_data_type_t) 50 /**< map of process locations */ -#define ORTE_MAPPED_PROC (orte_data_type_t) 51 /**< process entry on map */ -#define ORTE_MAPPED_NODE (orte_data_type_t) 52 /**< node entry on map */ -#define ORTE_ATTRIBUTE (orte_data_type_t) 53 /**< attribute used to control framework behavior */ -#define ORTE_ATTR_LIST (orte_data_type_t) 54 /**< list of attributes */ -/* RML types */ -#define ORTE_RML_TAG (orte_data_type_t) 55 /**< tag for sending/receiving messages */ - -/* DAEMON communication type */ -#define ORTE_DAEMON_CMD (orte_data_type_t) 56 /**< command flag for communicating with the daemon */ - -/* Need a command separate from ORTE_DAEMON_CMD, so that we can receive on - * them both at the same time */ -#define ORTE_CKPT_CMD (orte_data_type_t) 61 /**< command flag for communicating with HNP */ -/* define the starting point for dynamically assigning data types */ -#define ORTE_DSS_ID_DYNAMIC 70 - -#endif diff --git a/orte/include/orte/types.h b/orte/include/orte/types.h new file mode 100644 index 0000000000..29b1aaffbc --- /dev/null +++ b/orte/include/orte/types.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file */ + +#ifndef ORTE_TYPES_H +#define ORTE_TYPES_H + +#include "orte_config.h" + +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#include "opal/dss/dss_types.h" + +/** + * Supported datatypes for messaging and storage operations. + */ + +typedef int32_t orte_std_cntr_t; /** standard counters used in ORTE */ +#define ORTE_STD_CNTR_T OPAL_INT32 +#define ORTE_STD_CNTR_MAX INT32_MAX +#define ORTE_STD_CNTR_MIN INT32_MIN +#define ORTE_STD_CNTR_INVALID -1 + +/* + * general typedefs & structures + */ +/** Set the allowed range for ids in each space + * + * NOTE: Be sure to update the ORTE_NAME_ARGS #define (above) and all + * uses of it if these types change to be larger than (long)! The + * HTON and NTOH macros below must be updated, as well as the MIN / + * MAX macros below and the datatype packing representations in + * orte/mca/plm/base/plm_private.h + * + * NOTE: Be sure to keep the jobid and vpid types the same size! Due + * to padding rules, it won't save anything to have one larger than + * the other, and it will cause problems in the communication subsystems + */ + +typedef uint32_t orte_jobid_t; +#define ORTE_JOBID_T OPAL_UINT32 +#define ORTE_JOBID_MAX UINT32_MAX-2 +#define ORTE_JOBID_MIN 0 +typedef uint32_t orte_vpid_t; +#define ORTE_VPID_T OPAL_UINT32 +#define ORTE_VPID_MAX UINT32_MAX-2 +#define ORTE_VPID_MIN 0 + +#define ORTE_PROCESS_NAME_HTON(n) \ +do { \ + n.jobid = htonl(n.jobid); \ + n.vpid = htonl(n.vpid); \ +} while (0) + +#define ORTE_PROCESS_NAME_NTOH(n) \ +do { \ + n.jobid = ntohl(n.jobid); \ + n.vpid = ntohl(n.vpid); \ +} while (0) + +#define ORTE_NAME_ARGS(n) \ + (unsigned long) ((NULL == n) ? (unsigned long)ORTE_JOBID_INVALID : (unsigned long)(n)->jobid), \ + (unsigned long) ((NULL == n) ? (unsigned long)ORTE_VPID_INVALID : (unsigned long)(n)->vpid) + +/* + * define invalid values + */ +#define ORTE_JOBID_INVALID (ORTE_JOBID_MAX + 2) +#define ORTE_VPID_INVALID (ORTE_VPID_MAX + 2) + +/* + * define wildcard values + */ +#define ORTE_JOBID_WILDCARD (ORTE_JOBID_MAX + 1) +#define ORTE_VPID_WILDCARD (ORTE_VPID_MAX + 1) + +/* + * define the process name structure + */ +struct orte_process_name_t { + orte_jobid_t jobid; /**< Job number */ + orte_vpid_t vpid; /**< Process id - equivalent to rank */ +}; +typedef struct orte_process_name_t orte_process_name_t; + +/* + * define a generic id for nodes + */ +typedef int32_t orte_nodeid_t; +#define ORTE_NODEID OPAL_INT32 +#define ORTE_NODEID_WILDCARD -1 +#define ORTE_NODEID_INVALID INT32_MIN + + +/** + * handle differences in iovec + */ + +#if defined(__APPLE__) || defined(__WINDOWS__) +typedef char* orte_iov_base_ptr_t; +#else +typedef void* orte_iov_base_ptr_t; +#endif + + +/* General ORTE types - support handled within DSS */ +#define ORTE_STD_CNTR (OPAL_DSS_ID_DYNAMIC + 1) /**< standard counter type */ +/* PLM types */ + /* Name-related types */ +#define ORTE_NAME (OPAL_DSS_ID_DYNAMIC + 2) /**< an orte_process_name_t */ +#define ORTE_VPID (OPAL_DSS_ID_DYNAMIC + 3) /**< a vpid */ +#define ORTE_JOBID (OPAL_DSS_ID_DYNAMIC + 4) /**< a jobid */ + /* State-related types */ +#define ORTE_NODE_STATE (OPAL_DSS_ID_DYNAMIC + 5) /**< node status flag */ +#define ORTE_PROC_STATE (OPAL_DSS_ID_DYNAMIC + 6) /**< process/resource status */ +#define ORTE_JOB_STATE (OPAL_DSS_ID_DYNAMIC + 7) /**< job status flag */ +#define ORTE_EXIT_CODE (OPAL_DSS_ID_DYNAMIC + 8) /**< process exit code */ + /* Data-passing types */ +#define ORTE_VALUE (OPAL_DSS_ID_DYNAMIC + 9) /**< registry return value */ + /* Resource types */ +#define ORTE_APP_CONTEXT (OPAL_DSS_ID_DYNAMIC + 10) /**< argv and enviro arrays */ +#define ORTE_APP_CONTEXT_MAP (OPAL_DSS_ID_DYNAMIC + 11) /**< application context mapping array */ +#define ORTE_NODE_DESC (OPAL_DSS_ID_DYNAMIC + 12) /**< describes capabilities of nodes */ +#define ORTE_SLOT_DESC (OPAL_DSS_ID_DYNAMIC + 13) /**< describes slot allocations/reservations */ +#define ORTE_JOB (OPAL_DSS_ID_DYNAMIC + 14) /**< job information */ +#define ORTE_NODE (OPAL_DSS_ID_DYNAMIC + 15) /**< node information */ +#define ORTE_PROC (OPAL_DSS_ID_DYNAMIC + 16) /**< process information */ +#define ORTE_JOB_MAP (OPAL_DSS_ID_DYNAMIC + 17) /**< map of process locations */ + +/* RML types */ +#define ORTE_RML_TAG (OPAL_DSS_ID_DYNAMIC + 18) /**< tag for sending/receiving messages */ + +/* DAEMON command type */ +#define ORTE_DAEMON_CMD (OPAL_DSS_ID_DYNAMIC + 19) /**< command flag for communicating with the daemon */ + +/* GRPCOMM types */ +#define ORTE_GRPCOMM_MODE (OPAL_DSS_ID_DYNAMIC + 20) + +#endif diff --git a/orte/mca/errmgr/base/Makefile.am b/orte/mca/errmgr/base/Makefile.am index 04be5aae25..7a8dbabb5b 100644 --- a/orte/mca/errmgr/base/Makefile.am +++ b/orte/mca/errmgr/base/Makefile.am @@ -22,7 +22,6 @@ headers += \ libmca_errmgr_la_SOURCES += \ base/errmgr_base_close.c \ - base/errmgr_base_receive.c \ base/errmgr_base_select.c \ base/errmgr_base_open.c \ base/errmgr_base_fns.c diff --git a/orte/mca/errmgr/base/base.h b/orte/mca/errmgr/base/base.h index 0bf0b1c6d4..3ff4fe50d6 100644 --- a/orte/mca/errmgr/base/base.h +++ b/orte/mca/errmgr/base/base.h @@ -25,12 +25,11 @@ * includes */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/class/opal_list.h" #include "opal/mca/mca.h" -#include "orte/mca/ns/ns_types.h" #include "orte/mca/errmgr/errmgr.h" @@ -53,14 +52,11 @@ ORTE_DECLSPEC int orte_errmgr_base_close(void); * globals that might be needed */ -ORTE_DECLSPEC extern int orte_errmgr_base_output; extern bool orte_errmgr_base_selected; extern bool orte_errmgr_initialized; ORTE_DECLSPEC extern opal_list_t orte_errmgr_base_components_available; ORTE_DECLSPEC extern mca_errmgr_base_component_t orte_errmgr_base_selected_component; -/* make the default module available so that close can use it */ -ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr_default; /* * external API functions will be documented in the mca/errmgr/errmgr.h file */ diff --git a/orte/mca/errmgr/base/errmgr_base_close.c b/orte/mca/errmgr/base/errmgr_base_close.c index bc52a3f390..4239704b40 100644 --- a/orte/mca/errmgr/base/errmgr_base_close.c +++ b/orte/mca/errmgr/base/errmgr_base_close.c @@ -17,40 +17,38 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include -#include "orte/orte_constants.h" #include "opal/util/trace.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/base/base.h" +#include "orte/mca/errmgr/base/errmgr_private.h" int orte_errmgr_base_close(void) { OPAL_TRACE(5); - /* If we have a selected component and module, then finalize it */ - - if (orte_errmgr_base_selected) { - orte_errmgr_base_selected_component.errmgr_finalize(); - } - - /* Close all remaining available components (may be one if this is a - OMPI RTE program, or [possibly] multiple if this is ompi_info) */ - - mca_base_components_close(orte_errmgr_base_output, - &orte_errmgr_base_components_available, NULL); - + /* If we have a selected component and module, then finalize it */ + + if (orte_errmgr_base_selected) { + orte_errmgr_base_selected_component.errmgr_finalize(); + } + + /* Close all remaining available components (may be one if this is a + OMPI RTE program, or [possibly] multiple if this is ompi_info) */ + + mca_base_components_close(orte_errmgr_base_output, + &orte_errmgr_base_components_available, NULL); + orte_errmgr_initialized = false; - /* set the module back to the default so that error logging can continue */ - orte_errmgr = orte_errmgr_default; + /* All done */ - /* All done */ - - return ORTE_SUCCESS; + return ORTE_SUCCESS; } diff --git a/orte/mca/errmgr/base/errmgr_base_fns.c b/orte/mca/errmgr/base/errmgr_base_fns.c index 3c73ccd88f..b71000aa58 100644 --- a/orte/mca/errmgr/base/errmgr_base_fns.c +++ b/orte/mca/errmgr/base/errmgr_base_fns.c @@ -18,16 +18,21 @@ #include "orte_config.h" +#include "orte/constants.h" + #ifdef HAVE_UNISTD_H #include #endif #include -#include "orte/orte_constants.h" #include "opal/util/output.h" #include "opal/util/trace.h" -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" +#include "opal/util/error.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/util/session_dir.h" +#include "orte/mca/ess/ess.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/base/errmgr_private.h" @@ -43,39 +48,45 @@ void orte_errmgr_base_log(int error_code, char *filename, int line) } opal_output(0, "%s ORTE_ERROR_LOG: %s in file %s at line %d", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(error_code), filename, line); } -int orte_errmgr_base_proc_aborted_not_avail(orte_gpr_notify_message_t *msg) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -int orte_errmgr_base_incomplete_start_not_avail(orte_gpr_notify_message_t *msgb) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -void orte_errmgr_base_error_detected(int error_code, char *fmt, ...) -{ - /* we can't know if any output is available yet, so - * we just exit */ - exit(error_code); -} - -void orte_errmgr_base_abort(void) -{ - /* guess we should exit */ - exit(-1); -} - -int orte_errmgr_base_register_job_not_avail(orte_jobid_t job) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -int orte_errmgr_base_abort_procs_request_not_avail(orte_process_name_t *procs, orte_std_cntr_t num_procs) +void orte_errmgr_base_proc_aborted_not_avail(orte_process_name_t *name, int exit_code) +{ + return; +} + +void orte_errmgr_base_incomplete_start_not_avail(orte_jobid_t job, int exit_code) +{ + return; +} + +void orte_errmgr_base_error_abort(int error_code, char *fmt, ...) +{ + va_list arglist; + + /* If there was a message, output it */ + va_start(arglist, fmt); + if( NULL != fmt ) { + char* buffer = NULL; + vasprintf( &buffer, fmt, arglist ); + opal_output( 0, buffer ); + free( buffer ); + } + va_end(arglist); + + /* cleanup my session directory */ + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* abnormal exit */ + orte_ess.abort(error_code, false); +} + +int orte_errmgr_base_register_cb_not_avail(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata) { return ORTE_ERR_NOT_AVAILABLE; } diff --git a/orte/mca/errmgr/base/errmgr_base_open.c b/orte/mca/errmgr/base/errmgr_base_open.c index 1f4e65533b..06609cb5a6 100644 --- a/orte/mca/errmgr/base/errmgr_base_open.c +++ b/orte/mca/errmgr/base/errmgr_base_open.c @@ -18,7 +18,7 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" @@ -47,20 +47,17 @@ */ int orte_errmgr_base_output = -1; /* - * we must define a default module so that the error logging - * functions can be available as early as possible + * define a default module that all application procs + * can use without having to open the framework. The + * decision on whether or not to open the framework is + * made in orte_init */ -orte_errmgr_base_module_t orte_errmgr_default = { - orte_errmgr_base_log, +orte_errmgr_base_module_t orte_errmgr = { orte_errmgr_base_proc_aborted_not_avail, orte_errmgr_base_incomplete_start_not_avail, - orte_errmgr_base_error_detected, - orte_errmgr_base_register_job_not_avail, - orte_errmgr_base_abort, - orte_errmgr_base_abort_procs_request_not_avail + orte_errmgr_base_register_cb_not_avail, + orte_errmgr_base_error_abort }; -/* start out with a default module */ -orte_errmgr_base_module_t orte_errmgr; bool orte_errmgr_base_selected = false; opal_list_t orte_errmgr_base_components_available; @@ -90,9 +87,6 @@ int orte_errmgr_base_open(void) orte_errmgr_base_output = -1; } - /* set the default module */ - orte_errmgr = orte_errmgr_default; - /* Open up all available components */ if (ORTE_SUCCESS != diff --git a/orte/mca/errmgr/base/errmgr_base_receive.c b/orte/mca/errmgr/base/errmgr_base_receive.c deleted file mode 100644 index 4800c486e1..0000000000 --- a/orte/mca/errmgr/base/errmgr_base_receive.c +++ /dev/null @@ -1,180 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/dss/dss.h" -#include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/errmgr/base/errmgr_private.h" - -static bool recv_issued=false; - -int orte_errmgr_base_comm_start(void) -{ - int rc; - - if (recv_issued) { - return ORTE_SUCCESS; - } - - if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_ERRMGR, - ORTE_RML_PERSISTENT, - orte_errmgr_base_recv, - NULL))) { - ORTE_ERROR_LOG(rc); - } - recv_issued = true; - - return rc; -} - -int orte_errmgr_base_comm_stop(void) -{ - int rc; - - if (!recv_issued) { - return ORTE_SUCCESS; - } - - if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ERRMGR))) { - ORTE_ERROR_LOG(rc); - } - recv_issued = false; - - return rc; -} - - - -/* - * handle message from proxies - * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. - * DO NOT RELEASE THIS BUFFER IN THIS CODE - */ - -void orte_errmgr_base_recv(int status, orte_process_name_t* sender, - orte_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_buffer_t answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count, nprocs; - orte_process_name_t *procs; - orte_jobid_t jobid; - int rc; - - OPAL_TRACE(2); - - /* get the command */ - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* setup to return an answer */ - OBJ_CONSTRUCT(&answer, orte_buffer_t); - - /* pack the command in the answer - this is done to allow the caller to check - * that we are talking about the same command - */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - return; - } - - switch (command) { - case ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD: - /* get the number of processes */ - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &nprocs, &count, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - /* get the required space */ - procs = (orte_process_name_t*)malloc(nprocs * sizeof(orte_process_name_t)); - if (NULL == procs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - goto SEND_ANSWER; - } - - /* unpack the array of process names */ - count = nprocs; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, procs, &count, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - /* if we didn't get the number we requested, then something is wrong */ - if (count != nprocs) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto SEND_ANSWER; - } - - /* process the request */ - if (ORTE_SUCCESS != (rc = orte_errmgr.abort_procs_request(procs, nprocs))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - break; - - case ORTE_ERRMGR_REGISTER_JOB_CMD: - /* register the job to monitor for alerts */ - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &jobid, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - /* process the request */ - if (ORTE_SUCCESS != (rc = orte_errmgr.register_job(jobid))) { - ORTE_ERROR_LOG(rc); - goto SEND_ANSWER; - } - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); - } - -SEND_ANSWER: - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - } - - /* cleanup */ - OBJ_DESTRUCT(&answer); -} - diff --git a/orte/mca/errmgr/base/errmgr_base_select.c b/orte/mca/errmgr/base/errmgr_base_select.c index d808fbed78..73d294d7d7 100644 --- a/orte/mca/errmgr/base/errmgr_base_select.c +++ b/orte/mca/errmgr/base/errmgr_base_select.c @@ -35,7 +35,6 @@ int orte_errmgr_base_select(void) mca_base_component_list_item_t *cli; mca_errmgr_base_component_t *component, *best_component = NULL; orte_errmgr_base_module_t *module, *best_module = NULL; - bool multi, hidden; int priority, best_priority = -1; /* Iterate through all the available components */ @@ -49,7 +48,7 @@ int orte_errmgr_base_select(void) /* Call the component's init function and see if it wants to be selected */ - module = component->errmgr_init(&multi, &hidden, &priority); + module = component->errmgr_init(&priority); /* If we got a non-NULL module back, then the component wants to be selected. So save its multi/hidden values and save the diff --git a/orte/mca/errmgr/base/errmgr_private.h b/orte/mca/errmgr/base/errmgr_private.h index 2c1d0d4322..5a439f35ef 100644 --- a/orte/mca/errmgr/base/errmgr_private.h +++ b/orte/mca/errmgr/base/errmgr_private.h @@ -25,35 +25,32 @@ * includes */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" +#include "orte/types.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/rml/rml.h" +#include "opal/dss/dss_types.h" +#include "orte/mca/rml/rml_types.h" +#include "orte/mca/plm/plm_types.h" + +#include "orte/mca/errmgr/errmgr.h" /* * Functions for use solely within the ERRMGR framework */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /* Define the ERRMGR command flag */ typedef uint8_t orte_errmgr_cmd_flag_t; -#define ORTE_ERRMGR_CMD ORTE_UINT8 +#define ORTE_ERRMGR_CMD OPAL_UINT8 /* define some commands */ #define ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD 0x01 -#define ORTE_ERRMGR_REGISTER_JOB_CMD 0x02 +#define ORTE_ERRMGR_REGISTER_CALLBACK_CMD 0x02 -/* Internal support */ -ORTE_DECLSPEC int orte_errmgr_base_comm_start(void); -ORTE_DECLSPEC int orte_errmgr_base_comm_stop(void); -void orte_errmgr_base_recv(int status, orte_process_name_t* sender, - orte_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); - +/* provide access to verbose output channel */ +ORTE_DECLSPEC extern int orte_errmgr_base_output; + /* * Base functions @@ -61,23 +58,20 @@ void orte_errmgr_base_recv(int status, orte_process_name_t* sender, ORTE_DECLSPEC void orte_errmgr_base_log(int error_code, char *filename, int line); -ORTE_DECLSPEC int orte_errmgr_base_proc_aborted_not_avail(orte_gpr_notify_message_t *msg); +ORTE_DECLSPEC void orte_errmgr_base_proc_aborted_not_avail(orte_process_name_t *name, int exit_code); -ORTE_DECLSPEC int orte_errmgr_base_incomplete_start_not_avail(orte_gpr_notify_message_t *msg); +ORTE_DECLSPEC void orte_errmgr_base_incomplete_start_not_avail(orte_jobid_t job, int exit_code); -ORTE_DECLSPEC void orte_errmgr_base_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); +ORTE_DECLSPEC void orte_errmgr_base_error_abort(int error_code, char *fmt, ...); -ORTE_DECLSPEC int orte_errmgr_base_register_job_not_avail(orte_jobid_t job); - -ORTE_DECLSPEC void orte_errmgr_base_abort(void) __opal_attribute_noreturn__; - -ORTE_DECLSPEC int orte_errmgr_base_abort_procs_request_not_avail(orte_process_name_t *procs, orte_std_cntr_t num_procs); +ORTE_DECLSPEC int orte_errmgr_base_register_cb_not_avail(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata); /* * external API functions will be documented in the mca/errmgr/errmgr.h file */ -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/errmgr/bproc/Makefile.am b/orte/mca/errmgr/bproc/Makefile.am deleted file mode 100644 index a5aab9b7a8..0000000000 --- a/orte/mca/errmgr/bproc/Makefile.am +++ /dev/null @@ -1,48 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include $(errmgr_bproc_CPPFLAGS) - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_errmgr_bproc_DSO -component_noinst = -component_install = mca_errmgr_bproc.la -else -component_noinst = libmca_errmgr_bproc.la -component_install = -endif - -sources = \ - errmgr_bproc.h \ - errmgr_bproc.c \ - errmgr_bproc_component.c - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_errmgr_bproc_la_SOURCES = $(sources) -mca_errmgr_bproc_la_LIBS = $(errmgr_bproc_LIBS) -mca_errmgr_bproc_la_LDFLAGS = -module -avoid-version $(errmgr_bproc_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_errmgr_bproc_la_SOURCES = $(sources) -libmca_errmgr_bproc_la_LIBADD = $(errmgr_bproc_LIBS) -libmca_errmgr_bproc_la_LDFLAGS = -module -avoid-version $(errmgr_bproc_LDFLAGS) diff --git a/orte/mca/errmgr/bproc/configure.m4 b/orte/mca/errmgr/bproc/configure.m4 deleted file mode 100644 index 5fc4f86287..0000000000 --- a/orte/mca/errmgr/bproc/configure.m4 +++ /dev/null @@ -1,38 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_errmgr_bproc_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_errmgr_bproc_CONFIG],[ - OMPI_CHECK_BPROC([errmgr_bproc], [errmgr_bproc_good=1], - [errmgr_bproc_good=1], [errmgr_bproc_good=0]) - - # if check worked, set wrapper flags if so. - # Evaluate succeed / fail - AS_IF([test "$errmgr_bproc_good" = "1"], - [errmgr_bproc_WRAPPER_EXTRA_LDFLAGS="$errmgr_bproc_LDFLAGS" - errmgr_bproc_WRAPPER_EXTRA_LIBS="$errmgr_bproc_LIBS" - $1], - [$2]) - - # set build flags to use in makefile - AC_SUBST([errmgr_bproc_CPPFLAGS]) - AC_SUBST([errmgr_bproc_LDFLAGS]) - AC_SUBST([errmgr_bproc_LIBS]) -])dnl diff --git a/orte/mca/errmgr/bproc/errmgr_bproc.c b/orte/mca/errmgr/bproc/errmgr_bproc.c deleted file mode 100644 index 6856eba6c2..0000000000 --- a/orte/mca/errmgr/bproc/errmgr_bproc.c +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/runtime/runtime.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/errmgr/base/errmgr_private.h" -#include "orte/mca/errmgr/bproc/errmgr_bproc.h" - -/* - * This function gets called when the SMR updates a process state to - * indicate that it aborted. Since the bproc component is only active on - * non-HNP processes, this function will NEVER be called - */ -int orte_errmgr_bproc_proc_aborted(orte_gpr_notify_message_t *msg) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when the SMR updates a process state to - * indicate that it failed to start. Since the bproc component is only active on - * non-HNP processes, this function will NEVER be called - */ -int orte_errmgr_bproc_incomplete_start(orte_gpr_notify_message_t *msg) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when a process detects an internal error. - * Bproc is unusually bad about letting us pass information that we - * aborted as opposed to normally terminated. There is no way to locally - * monitor the process state on a remote node, so the only thing we - * can do is pass the info back to the Bproc PLS on the HNP and let it - * figure out what to do. - */ -void orte_errmgr_bproc_error_detected(int error_code, char *fmt, ...) -{ - va_list arglist; - orte_buffer_t* cmd; - uint8_t command; - int rc; - - OPAL_TRACE(1); - - /* If there was a message, output it */ - va_start(arglist, fmt); - if( NULL != fmt ) { - char* buffer = NULL; - vasprintf( &buffer, fmt, arglist ); - opal_output( 0, buffer ); - free( buffer ); - } - va_end(arglist); - - /* Now prepare and send a message to the BProc PLS so it knows that - * we abnormally terminated. It doesn't matter what is in the - * message - the fact that it gets received is adequate - */ - command = 0x01; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return; - } - - /* just pack something */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return; - } - - /* send the alert */ - if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_BPROC_ABORT, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return; - } - OBJ_RELEASE(cmd); - - /* okay, now we can truly abort. Tell the abort function not to bother writing out - * an abort file - we can't do anything with it anyway! - */ - orte_abort(error_code, false); -} - -/* - * This function gets called when a process desperately needs to just die. - * Nothing can be done by definition here - this function ONLY gets - * called as an absolute last resort. - */ -void orte_errmgr_bproc_abort() -{ - /* abnormal exit - no point in writing out an abort file as bproc doesn't - * know what to do with it anyway - */ - orte_abort(-1, false); -} - -/* - * Alternatively, some systems (e.g., OpenMPI) need to tell us to kill - * some other subset of processes along with us. Send that info to the - * HNP so it can kill them. - * - * NOTE: this function assumes that the underlying ORTE infrastructure is - * still operational. Use of this function should therefore be restricted - * to cases where the problem is in a higher layer (e.g., MPI) as the - * process is likely to "hang" if an ORTE problem has been encountered. - */ -int orte_errmgr_bproc_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - /* protect us against error */ - if (NULL == procs) { - return ORTE_ERR_BAD_PARAM; - } - - command = ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the number of procs we are requesting be aborted */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &nprocs, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the array of proc names */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, procs, nprocs, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_bproc_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - -/* - * It is imperative that ONLY an HNP perform this registration! - */ -int orte_errmgr_bproc_register_job(orte_jobid_t job) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - command = ORTE_ERRMGR_REGISTER_JOB_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the jobid we are requesting be monitored */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_bproc_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_REGISTER_JOB_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/bproc/errmgr_bproc.h b/orte/mca/errmgr/bproc/errmgr_bproc.h deleted file mode 100644 index 2966771003..0000000000 --- a/orte/mca/errmgr/bproc/errmgr_bproc.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef ORTE_ERRMGR_BPROC_H -#define ORTE_ERRMGR_BPROC_H - - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" - -#include "orte/mca/errmgr/errmgr.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_errmgr_bproc_open(void); -int orte_errmgr_bproc_close(void); - - -/* - * Startup / Shutdown - */ -orte_errmgr_base_module_t* -orte_errmgr_bproc_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); - -int orte_errmgr_bproc_finalize(void); - -/* - * globals used within the component - */ -typedef struct { - int debug; - orte_process_name_t *replica; -} orte_errmgr_bproc_globals_t; - - -extern orte_errmgr_bproc_globals_t orte_errmgr_bproc_globals; - -/* - * Component API functions - */ -int orte_errmgr_bproc_proc_aborted(orte_gpr_notify_message_t *msg); - -int orte_errmgr_bproc_incomplete_start(orte_gpr_notify_message_t *msg); - -void orte_errmgr_bproc_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); - -void orte_errmgr_bproc_abort(void) __opal_attribute_noreturn__; - -int orte_errmgr_bproc_register_job(orte_jobid_t job); - -int orte_errmgr_bproc_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/errmgr/bproc/errmgr_bproc_component.c b/orte/mca/errmgr/bproc/errmgr_bproc_component.c deleted file mode 100644 index 712c1481be..0000000000 --- a/orte/mca/errmgr/bproc/errmgr_bproc_component.c +++ /dev/null @@ -1,165 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_bproc.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_errmgr_base_component_t mca_errmgr_bproc_component = { - { - ORTE_ERRMGR_BASE_VERSION_1_3_0, - - "bproc", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_errmgr_bproc_open, /* module open */ - orte_errmgr_bproc_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_errmgr_bproc_component_init, /* module init */ - orte_errmgr_bproc_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_errmgr_base_module_t orte_errmgr_bproc = { - orte_errmgr_base_log, - orte_errmgr_bproc_proc_aborted, - orte_errmgr_bproc_incomplete_start, - orte_errmgr_bproc_error_detected, - orte_errmgr_bproc_register_job, - orte_errmgr_bproc_abort, - orte_errmgr_bproc_abort_procs_request -}; - - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* local globals */ -orte_errmgr_bproc_globals_t orte_errmgr_bproc_globals; - -/* - * Open the component - */ -int orte_errmgr_bproc_open(void) -{ - int id, tmp; - - id = mca_base_param_register_int("errmgr", "bproc", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_errmgr_bproc_globals.debug = true; - } else { - orte_errmgr_bproc_globals.debug = false; - } - - return ORTE_SUCCESS; -} - -/* - * Close the component - */ -int orte_errmgr_bproc_close(void) -{ - return ORTE_SUCCESS; -} - -orte_errmgr_base_module_t* -orte_errmgr_bproc_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - if (orte_errmgr_bproc_globals.debug) { - opal_output(0, "errmgr_bproc_init called"); - } - - /* If we are an HNP or an orted, then don't pick us! */ - if (orte_process_info.seed || orte_process_info.daemon) { - /* don't take me! */ - return NULL; - } - - /* Return a module (choose an arbitrary, positive priority -- - absolutely must be higher than the proxy component - */ - - *priority = 100; - - /* no part of OpenRTE allows or has threads */ - - *allow_multi_user_threads = false; - *have_hidden_threads = false; - - /* define the replica for us to use - for now, just point - * to the name service replica - */ - orte_errmgr_bproc_globals.replica = orte_process_info.ns_replica; - - initialized = true; - return &orte_errmgr_bproc; -} - -/* - * finalize routine - */ -int orte_errmgr_bproc_finalize(void) -{ - if (orte_errmgr_bproc_globals.debug) { - opal_output(0, "%s errmgr_bproc_finalize called", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - initialized = false; - - /* All done */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/orted/Makefile.am b/orte/mca/errmgr/default/Makefile.am similarity index 72% rename from orte/mca/errmgr/orted/Makefile.am rename to orte/mca/errmgr/default/Makefile.am index 7c06efb6f4..a32c8bbf1e 100644 --- a/orte/mca/errmgr/orted/Makefile.am +++ b/orte/mca/errmgr/default/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - errmgr_orted.h \ - errmgr_orted_component.c \ - errmgr_orted.c + errmgr_default.h \ + errmgr_default_component.c \ + errmgr_default.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_errmgr_orted_DSO +if OMPI_BUILD_errmgr_default_DSO component_noinst = -component_install = mca_errmgr_orted.la +component_install = mca_errmgr_default.la else -component_noinst = libmca_errmgr_orted.la +component_noinst = libmca_errmgr_default.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_errmgr_orted_la_SOURCES = $(sources) -mca_errmgr_orted_la_LDFLAGS = -module -avoid-version +mca_errmgr_default_la_SOURCES = $(sources) +mca_errmgr_default_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_errmgr_orted_la_SOURCES =$(sources) -libmca_errmgr_orted_la_LDFLAGS = -module -avoid-version +libmca_errmgr_default_la_SOURCES =$(sources) +libmca_errmgr_default_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/errmgr/orted/configure.params b/orte/mca/errmgr/default/configure.params similarity index 100% rename from orte/mca/errmgr/orted/configure.params rename to orte/mca/errmgr/default/configure.params diff --git a/orte/mca/errmgr/default/errmgr_default.c b/orte/mca/errmgr/default/errmgr_default.c new file mode 100644 index 0000000000..2c708b02a9 --- /dev/null +++ b/orte/mca/errmgr/default/errmgr_default.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include + +#include "opal/class/opal_list.h" +#include "opal/util/trace.h" +#include "opal/util/output.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wakeup.h" +#include "orte/mca/plm/plm.h" +#include "orte/util/session_dir.h" +#include "orte/util/name_fns.h" + +#include "orte/mca/errmgr/base/errmgr_private.h" +#include "errmgr_default.h" + +/* + * This function gets called by the PLM when an orted notifies us + * that a process has aborted + * Various components will follow their own strategy for dealing with + * this situation. For this component, we simply kill the job. + */ +void orte_errmgr_default_proc_aborted(orte_process_name_t *name, int exit_code) +{ + int rc; + orte_job_t **jobs; + orte_std_cntr_t i; + + OPAL_TRACE(1); + + /* if we are already in progress, then ignore this call */ + if (orte_abort_in_progress) { + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, + "%s errmgr:default: abort in progress, ignoring proc %s aborted with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(name), exit_code)); + + return; + } + + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, + "%s errmgr:default: proc %s aborting with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(name), exit_code)); + + /* flag that we are aborting */ + orte_abort_in_progress = true; + + /* indicate that all jobs other than the one containing this + * proc have been orted to abort - this is necessary to avoid + * duplicate ordering of "abort". + * + * NOTE: be sure to not include the 0 job data location as this + * contains the daemons! + */ + jobs = (orte_job_t**)orte_job_data->addr; + for (i=1; i < orte_job_data->size; i++) { + /* the array is left justfied, so we can quit once + * we see a NULL + */ + if (NULL == jobs[i]) { + break; + } + if (ORTE_JOB_STATE_ABORTED != jobs[i]->state && + ORTE_JOB_STATE_ABORTED_BY_SIG != jobs[i]->state) { + jobs[i]->state = ORTE_JOB_STATE_ABORT_ORDERED; + } + } + + /* tell the plm to terminate all jobs */ + if (ORTE_SUCCESS != (rc = orte_plm.terminate_job(ORTE_JOBID_WILDCARD))) { + ORTE_ERROR_LOG(rc); + } + + /* wakeup orterun so we can exit */ + if (ORTE_SUCCESS != (rc = orte_wakeup(exit_code))) { + ORTE_ERROR_LOG(rc); + } +} + +/* + * This function gets called by the PLM when an orted notifies us that + * a job failed to start. + * Various components will follow their own strategy for dealing with + * this situation. For this component, we simply kill the job. + */ +void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code) +{ + int rc; + + OPAL_TRACE(1); + + /* if we are already in progress, then ignore this call */ + if (orte_abort_in_progress) { + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, + "%s errmgr:default: abort in progress, ignoring incomplete start on job %s with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job), exit_code)); + + return; + } + + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, + "%s errmgr:default: job %s reported incomplete start with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job), exit_code)); + + /* flag that we are aborting */ + orte_abort_in_progress = true; + + /* tell the plm to terminate all job */ + if (ORTE_SUCCESS != (rc = orte_plm.terminate_job(ORTE_JOBID_WILDCARD))) { + ORTE_ERROR_LOG(rc); + } + + /* wakeup orterun so we can exit */ + if (ORTE_SUCCESS != (rc = orte_wakeup(exit_code))) { + ORTE_ERROR_LOG(rc); + } +} + +/* + * Register a callback function upon a change to a specified job state. + */ +int orte_errmgr_default_register_callback(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata) +{ + return ORTE_ERR_NOT_IMPLEMENTED; +} diff --git a/orte/mca/errmgr/hnp/errmgr_hnp.h b/orte/mca/errmgr/default/errmgr_default.h similarity index 50% rename from orte/mca/errmgr/hnp/errmgr_hnp.h rename to orte/mca/errmgr/default/errmgr_default.h index 8dc79d13e4..7338199698 100644 --- a/orte/mca/errmgr/hnp/errmgr_hnp.h +++ b/orte/mca/errmgr/default/errmgr_default.h @@ -22,60 +22,43 @@ #include "orte_config.h" -#include "orte/orte_types.h" +#include "orte/types.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/mca/plm/plm_types.h" #include "orte/mca/errmgr/errmgr.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /* * Module open / close */ -int orte_errmgr_hnp_open(void); -int orte_errmgr_hnp_close(void); +int orte_errmgr_default_open(void); +int orte_errmgr_default_close(void); /* * Startup / Shutdown */ orte_errmgr_base_module_t* -orte_errmgr_hnp_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); +orte_errmgr_default_component_init(int *priority); -int orte_errmgr_hnp_finalize(void); - -/* - * globals used within the component - */ -typedef struct { - int debug; -} orte_errmgr_hnp_globals_t; - - -extern orte_errmgr_hnp_globals_t orte_errmgr_hnp_globals; +int orte_errmgr_default_finalize(void); /* * Component API functions */ -int orte_errmgr_hnp_proc_aborted(orte_gpr_notify_message_t *msg); +void orte_errmgr_default_proc_aborted(orte_process_name_t *name, int exit_code); -int orte_errmgr_hnp_incomplete_start(orte_gpr_notify_message_t *msg); +void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code); -void orte_errmgr_hnp_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); +int orte_errmgr_default_register_callback(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata); -void orte_errmgr_hnp_abort(void) __opal_attribute_noreturn__; +ORTE_MODULE_DECLSPEC extern mca_errmgr_base_component_t mca_errmgr_default_component; -int orte_errmgr_hnp_register_job(orte_jobid_t job); - -int orte_errmgr_hnp_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); - -ORTE_MODULE_DECLSPEC extern mca_errmgr_base_component_t mca_errmgr_hnp_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/errmgr/default/errmgr_default_component.c b/orte/mca/errmgr/default/errmgr_default_component.c new file mode 100644 index 0000000000..51fbaf940a --- /dev/null +++ b/orte/mca/errmgr/default/errmgr_default_component.c @@ -0,0 +1,121 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Proxy component + * + */ + +/* + * includes + */ +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/rml/rml.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/errmgr/base/errmgr_private.h" + +#include "errmgr_default.h" + + +/* + * Struct of function pointers that need to be initialized + */ +mca_errmgr_base_component_t mca_errmgr_default_component = { + { + ORTE_ERRMGR_BASE_VERSION_1_3_0, + + "default", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_errmgr_default_open, /* module open */ + orte_errmgr_default_close /* module close */ + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + orte_errmgr_default_component_init, /* module init */ + orte_errmgr_default_finalize /* module shutdown */ +}; + +/* + * setup the function pointers for the module + */ +orte_errmgr_base_module_t orte_errmgr_default = { + orte_errmgr_default_proc_aborted, + orte_errmgr_default_incomplete_start, + orte_errmgr_default_register_callback, + orte_errmgr_base_error_abort +}; + + +/* + * Open the component + */ +int orte_errmgr_default_open(void) +{ + return ORTE_SUCCESS; +} + +/* + * Close the component + */ +int orte_errmgr_default_close(void) +{ + return ORTE_SUCCESS; +} + +orte_errmgr_base_module_t* +orte_errmgr_default_component_init(int *priority) +{ + /* If we are not an HNP, then don't pick us! */ + if (!orte_process_info.hnp) { + /* don't take me! */ + return NULL; + } + + /* Return a module (choose an arbitrary, positive priority -- + it's only relevant compared to other components). */ + + *priority = 10; + + return &orte_errmgr_default; +} + +/* + * finalize routine + */ +int orte_errmgr_default_finalize(void) +{ + /* All done */ + return ORTE_SUCCESS; +} diff --git a/orte/mca/errmgr/errmgr.h b/orte/mca/errmgr/errmgr.h index a8efae07db..551e95ae88 100644 --- a/orte/mca/errmgr/errmgr.h +++ b/orte/mca/errmgr/errmgr.h @@ -29,18 +29,15 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" - - -#include "orte/mca/schema/schema.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/ns/ns_types.h" +#include "orte/constants.h" +#include "orte/types.h" #include "opal/mca/mca.h" +#include "opal/util/error.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +#include "orte/mca/plm/plm_types.h" + +BEGIN_C_DECLS /* * Macro definitions @@ -51,41 +48,40 @@ extern "C" { */ #define ORTE_ERROR_NAME(n) opal_strerror(n) - #define ORTE_ERROR_LOG(n) \ - orte_errmgr.log((n), __FILE__, __LINE__) + orte_errmgr_base_log(n, __FILE__, __LINE__) + +/** + * This is not part of any + * module so it can be used at any time! + */ +ORTE_DECLSPEC extern void orte_errmgr_base_log(int error_code, char *filename, int line); + /* * Component functions - all MUST be provided! */ -/** - * Log an error - * Log an error that occurred in the runtime environment - * - * @code - * orte_errmgr.log("this is an error", __FILE__, __LINE__); - * @endcode - */ -typedef void (*orte_errmgr_base_module_log_fn_t)(int error_code, char *filename, int line); - - /** * Alert - process aborted - * This function is called when a remote process aborts during execution. The function - * is called via the GPR's trigger notification system. Actions taken in response - * to the abnormal termination of a remote application process will vary across + * This function is called by the PLM when a remote process aborts during execution. Actions taken + * in response to the abnormal termination of a remote application process will vary across * the various errmgr components. - + * * NOTE: Local process errors should always be reported through the error_detected interface and * NOT here. + * + * @param *name Pointer to the name of the proc that aborted + * + * @retval ORTE_SUCCESS Whatever action that was taken was successful + * @retval ORTE_ERROR Appropriate error code */ -typedef int (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_gpr_notify_message_t *msg); +typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *name, int exit_code); /** * Alert - incomplete start of a job - * This function is called when an attempted launch of a job encounters failure of + * This function is called by the PLM when an attempted launch of a job encounters failure of * one or more processes to start. The strategy for dealing * with this "incomplete start" situation varies across the various errmgr components. * @@ -97,28 +93,16 @@ typedef int (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_gpr_notify_message * NOTE: Errmgr components on non-HNP and non-daemon processes are expressly forbidden * from taking any action to this function call. Instead, they are restricted to simply * returning. + * + * @param job Job that failed to start + * + * @retval ORTE_SUCCESS Whatever action that was taken was successful + * @retval ORTE_ERROR Appropriate error code */ -typedef int (*orte_errmgr_base_module_incomplete_start_fn_t)(orte_gpr_notify_message_t *msg); +typedef void (*orte_errmgr_base_module_incomplete_start_fn_t)(orte_jobid_t job, int exit_code); -/** - * Alert - internal error detected - * This function is called when an internal error is detected within a local process. - * It decides what to do about the error. In the case of application processes, it simply - * orders the local process to finalize and terminate. The abnormal termination will be - * detected and dealt with by the daemon/HNP system. - * - * HNPs, of course, cannot simply exit - they must first cleanup their running jobs if at - * all possible. In some cases, this cannot be done - e.g., if the error detected would - * prevent operation of the registry or has corrupted memory. In these extreme cases, - * nothing can really be done. - * - * Likewise, orteds have responsibility towards their local application processes and - * must make some attempt to clean them up before exiting. - * - * The function pretty prints an error message if possible. Error message should be - * specified using the standard \code printf() format. - */ -typedef void (*orte_errmgr_base_module_error_detected_fn_t)(int error_code, char *fmt, ...); +/* error manager callback function */ +typedef void (*orte_errmgr_cb_fn_t)(orte_jobid_t job, orte_job_state_t state, void *cbdata); /* * Register a job with the error manager @@ -136,43 +120,28 @@ typedef void (*orte_errmgr_base_module_error_detected_fn_t)(int error_code, char * NOTE: ONLY HNPs are allowed to register for trigger reports. All other components * MUST do nothing but return ORTE_SUCCESS. */ -typedef int (*orte_errmgr_base_module_register_job_fn_t)(orte_jobid_t job); +typedef int (*orte_errmgr_base_module_register_cb_fn_t)(orte_jobid_t job, + orte_job_state_t state, + orte_errmgr_cb_fn_t cbfunc, + void *cbdata); /** * Alert - self aborting - * This function is called when a process is aborting. It will finalize the process - * itself, and then exits - it takes no other actions. The intent here is to provide + * This function is called when a process is aborting due to some internal error. + * It will finalize the process + * itself, and then exit - it takes no other actions. The intent here is to provide * a last-ditch exit procedure that attempts to clean up a little. */ -typedef void (*orte_errmgr_base_module_abort_fn_t)(void) __opal_attribute_noreturn__; +typedef void (*orte_errmgr_base_module_abort_fn_t)(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); -/* - * Request that the system abort processes other than myself - * The possibility exists that a process will decide that ONLY a small subset of a job - * must be aborted. This function allows a process to request that the identified - * processes be aborted. The "request" portion of the function's name is not - * by accident - this function specifically does NOT perform the abort process - * itself, but simply requests that it be done. - * - * NOTE: Please ensure that you do NOT include your own process name in the - * array or else you will be ordered to "die" before you complete this function - * (i.e., you will be held in a blocking receive pending an answer from the - * HNP, which won't come before you receive your own "die" command). If you need - * to die too, then call "abort" after completing this function call. - */ -typedef int (*orte_errmgr_base_module_abort_procs_request_fn_t)(orte_process_name_t *procs, orte_std_cntr_t num_procs); - /* * Ver 1.0.0 */ struct orte_errmgr_base_module_1_3_0_t { - orte_errmgr_base_module_log_fn_t log; orte_errmgr_base_module_proc_aborted_fn_t proc_aborted; orte_errmgr_base_module_incomplete_start_fn_t incomplete_start; - orte_errmgr_base_module_error_detected_fn_t error_detected; - orte_errmgr_base_module_register_job_fn_t register_job; + orte_errmgr_base_module_register_cb_fn_t register_callback; orte_errmgr_base_module_abort_fn_t abort; - orte_errmgr_base_module_abort_procs_request_fn_t abort_procs_request; }; typedef struct orte_errmgr_base_module_1_3_0_t orte_errmgr_base_module_1_3_0_t; @@ -182,10 +151,7 @@ typedef orte_errmgr_base_module_1_3_0_t orte_errmgr_base_module_t; * ERRMGR Component */ -typedef orte_errmgr_base_module_t* (*orte_errmgr_base_component_init_fn_t)( - bool *allow_multi_user_threads, - bool *have_hidden_threads, - int *priority); +typedef orte_errmgr_base_module_t* (*orte_errmgr_base_component_init_fn_t)(int *priority); typedef int (*orte_errmgr_base_component_finalize_fn_t)(void); @@ -218,8 +184,6 @@ typedef mca_errmgr_base_component_1_3_0_t mca_errmgr_base_component_t; */ ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr; /* holds selected module's function pointers */ -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/errmgr/hnp/errmgr_hnp.c b/orte/mca/errmgr/hnp/errmgr_hnp.c deleted file mode 100644 index f7844894e3..0000000000 --- a/orte/mca/errmgr/hnp/errmgr_hnp.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include -#include - -#include "opal/class/opal_list.h" -#include "opal/util/trace.h" -#include "opal/util/output.h" - -#include "orte/runtime/runtime.h" -#include "orte/runtime/params.h" -#include "orte/runtime/orte_wakeup.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/schema/schema.h" -#include "orte/dss/dss.h" -#include "orte/mca/rmgr/rmgr.h" - -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/errmgr/hnp/errmgr_hnp.h" - -/* - * This function gets called when the someone updates a process - * state to indicate it has aborted. That action results in - * the firing of a registry trigger that passes a minimal - * data message here. The only part of that message we need - * is the segment name so we can extract the jobid from it - * - * Various components will follow their own strategy for dealing with - * this situation. For this component, we simply kill the job. - */ -int orte_errmgr_hnp_proc_aborted(orte_gpr_notify_message_t *msg) -{ - orte_jobid_t job; - opal_list_t attrs; - opal_list_item_t *item; - int rc; - - OPAL_TRACE(1); - - opal_output(orte_errmgr_base_output, "errmgr:hnp: proc abort has been detected"); - - /* This trigger is named, so we can extract the jobid - * directly from the trigger name - */ - if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* set the job state */ - if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_ABORTED))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* tell the pls to terminate the ENTIRE FAMLIY of this job - this is necessary to avoid - * "hanging" portions of the application if the aborted job was dynamically spawned - * from another job - */ - OBJ_CONSTRUCT(&attrs, opal_list_t); - orte_rmgr.add_attribute(&attrs, ORTE_NS_USE_JOB_FAMILY, ORTE_UNDEF, NULL, ORTE_RMGR_ATTR_OVERRIDE); - if (ORTE_SUCCESS != (rc = orte_pls.terminate_job(job, &orte_abort_timeout, &attrs))) { - ORTE_ERROR_LOG(rc); - } - while (NULL != (item = opal_list_remove_first(&attrs))) OBJ_RELEASE(item); - OBJ_DESTRUCT(&attrs); - - /* orterun will only wakeup when all procs IN THE ROOT JOB report terminated. The terminate_job - * function *should* have done that - however, it is possible during abnormal - * startup that it will fail to happen. If we get here, we force the issue by - * deliberately causing the TERMINATE trigger to fire - */ - if (ORTE_SUCCESS != (rc = orte_wakeup(job))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * This function gets called when someone updates a process - * state to indicate it failed to start. That action results in - * the firing of a registry trigger that passes a minimal - * data message here. The only part of that message we need - * is the segment name so we can extract the jobid from it - * - * Various components will follow their own strategy for dealing with - * this situation. For this component, we simply kill the job. - */ -int orte_errmgr_hnp_incomplete_start(orte_gpr_notify_message_t *msg) -{ - orte_jobid_t job; - opal_list_t attrs; - opal_list_item_t *item; - int rc; - - OPAL_TRACE(1); - - /* This trigger is named, so we can extract the jobid - * directly from the trigger name - */ - if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - opal_output(orte_errmgr_base_output, "errmgr_hnp: incomplete start reported - job %lu", (unsigned long)job); - - /* set the job state */ - if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_FAILED_TO_START))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* tell the pls to terminate the job - kill this job and all members of its family - * as we have no way to handle it otherwise at this time - */ - OBJ_CONSTRUCT(&attrs, opal_list_t); - orte_rmgr.add_attribute(&attrs, ORTE_NS_USE_JOB_FAMILY, ORTE_UNDEF, NULL, ORTE_RMGR_ATTR_OVERRIDE); - if (ORTE_SUCCESS != (rc = orte_pls.terminate_job(job, &orte_abort_timeout, &attrs))) { - ORTE_ERROR_LOG(rc); - } - while (NULL != (item = opal_list_remove_first(&attrs))) OBJ_RELEASE(item); - OBJ_DESTRUCT(&attrs); - - /* orterun will only wakeup when all procs IN THE ROOT JOB report terminated. The terminate_job - * function *should* have done that - however, it is possible during abnormal - * startup that it will fail to happen. If we get here, we force the issue by - * deliberately causing the TERMINATE trigger to fire - */ - if (ORTE_SUCCESS != (rc = orte_wakeup(job))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * This function gets called when the HNP itself detects an internal error! - * Ideally, we would find some way to tell all the active jobs to die before - * we depart ourselves. Unfortunately, at this time, we aren't sure we can do - * this - later, we'll add some more intelligence by, for example, checking - * the error code to see if it's something that would allow us to alert - * the remote orteds. - * - * For now, we'll just depart! - */ -void orte_errmgr_hnp_error_detected(int error_code, char *fmt, ...) -{ - va_list arglist; - - /* If there was a message, output it */ - - va_start(arglist, fmt); - if( NULL != fmt ) { - char* buffer = NULL; - vasprintf( &buffer, fmt, arglist ); - opal_output( 0, buffer ); - free( buffer ); - } - va_end(arglist); - - /* abnormal exit */ - orte_abort(error_code, false); -} - -/* - * This function gets called when the HNP desperately needs to just die. - * Nothing can be done by definition here - this function ONLY gets - * called as an absolute last resort - */ -void orte_errmgr_hnp_abort(void) -{ - OPAL_TRACE(1); - - /* abnormal exit */ - orte_abort(-1, false); -} - -/* - * This function gets called when a process wants to request that the HNP - * abort some set of processes for it. Since this component IS for the HNP, - * that means we need to actually execute this request! Call upon the PLS - * as needed to execute the abort requests - */ -int orte_errmgr_hnp_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) -{ - int rc; - - OPAL_TRACE(1); - - rc = ORTE_SUCCESS; - return rc; -} - -/* - * Register the HNP's errmgr functions to be called when the job encounters - * certain pre-identified problem states. - * - * NOTE: It is imperative that ONLY the HNP perform this registration! - */ -int orte_errmgr_hnp_register_job(orte_jobid_t job) -{ - /* we need to setup two counters and their corresponding triggers - one - * to alert us when something fails to launch, and another for when - * someone aborts - */ - int rc; - - OPAL_TRACE(1); - - /* define the ABORT trigger to fire when any process aborts */ - if (ORTE_SUCCESS != (rc = orte_smr.define_alert_monitor(job, ORTE_NUM_ABORTED_TRIGGER, - ORTE_PROC_NUM_ABORTED, 0, 1, true, - orte_errmgr_hnp_proc_aborted, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* define the FAILED_LAUNCH trigger to fire when the launch fails */ - if (ORTE_SUCCESS != (rc = orte_smr.define_alert_monitor(job, ORTE_FAILED_TO_START_TRIGGER, - ORTE_PROC_NUM_FAILED_START, 0, 1, true, - orte_errmgr_hnp_incomplete_start, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/hnp/errmgr_hnp_component.c b/orte/mca/errmgr/hnp/errmgr_hnp_component.c deleted file mode 100644 index 19772a995d..0000000000 --- a/orte/mca/errmgr/hnp/errmgr_hnp_component.c +++ /dev/null @@ -1,175 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_hnp.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_errmgr_base_component_t mca_errmgr_hnp_component = { - { - ORTE_ERRMGR_BASE_VERSION_1_3_0, - - "hnp", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_errmgr_hnp_open, /* module open */ - orte_errmgr_hnp_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_errmgr_hnp_component_init, /* module init */ - orte_errmgr_hnp_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_errmgr_base_module_t orte_errmgr_hnp = { - orte_errmgr_base_log, - orte_errmgr_hnp_proc_aborted, - orte_errmgr_hnp_incomplete_start, - orte_errmgr_hnp_error_detected, - orte_errmgr_hnp_register_job, - orte_errmgr_hnp_abort, - orte_errmgr_hnp_abort_procs_request -}; - - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* local globals */ -orte_errmgr_hnp_globals_t orte_errmgr_hnp_globals; - - -/* - * Open the component - */ -int orte_errmgr_hnp_open(void) -{ - int id, tmp; - - id = mca_base_param_register_int("errmgr", "hnp", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_errmgr_hnp_globals.debug = true; - } else { - orte_errmgr_hnp_globals.debug = false; - } - - return ORTE_SUCCESS; -} - -/* - * Close the component - */ -int orte_errmgr_hnp_close(void) -{ - return ORTE_SUCCESS; -} - -orte_errmgr_base_module_t* -orte_errmgr_hnp_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - int rc; - - if (orte_errmgr_hnp_globals.debug) { - opal_output(0, "errmgr_hnp_init called"); - } - - /* If we are not an HNP, then don't pick us! */ - if (!orte_process_info.seed) { - /* don't take me! */ - return NULL; - } - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other components). */ - - *priority = 10; - - /* no part of OpenRTE allows or has threads */ - - *allow_multi_user_threads = false; - *have_hidden_threads = false; - - /* start the receive function */ - if (ORTE_SUCCESS != (rc = orte_errmgr_base_comm_start())) { - ORTE_ERROR_LOG(rc); - return NULL; - } - - initialized = true; - return &orte_errmgr_hnp; -} - -/* - * finalize routine - */ -int orte_errmgr_hnp_finalize(void) -{ - int rc; - - if (orte_errmgr_hnp_globals.debug) { - opal_output(0, "%s errmgr_hnp_finalize called", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - /* stop the receive function */ - if (ORTE_SUCCESS != (rc = orte_errmgr_base_comm_stop())) { - ORTE_ERROR_LOG(rc); - } - - initialized = false; - - /* All done */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/orted/errmgr_orted.c b/orte/mca/errmgr/orted/errmgr_orted.c deleted file mode 100644 index bba38a6d8c..0000000000 --- a/orte/mca/errmgr/orted/errmgr_orted.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include -#include - -#include "opal/util/output.h" - -#include "orte/runtime/runtime.h" -#include "orte/runtime/orte_wait.h" -#include "orte/util/proc_info.h" -#include "orte/util/session_dir.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/errmgr/base/errmgr_private.h" -#include "orte/mca/errmgr/orted/errmgr_orted.h" - -/* - * This function only gets called on HNP components! Orteds learn about - * a proc aborting from the HNP. - */ -int orte_errmgr_orted_proc_aborted(orte_gpr_notify_message_t *msg) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -/* This function only gets called on HNP components! Orteds learn about - * an incomplete start from the HNP. - */ -int orte_errmgr_orted_incomplete_start(orte_gpr_notify_message_t *msg) -{ - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when the orted itself detects an internal error! - * At some point in future, to be polite, we tell any of our own local - * processes to die before we abandon them - */ -void orte_errmgr_orted_error_detected(int error_code, char *fmt, ...) -{ - va_list arglist; - - /* If there was a message, output it */ - - va_start(arglist, fmt); - if( NULL != fmt ) { - char* buffer = NULL; - vasprintf( &buffer, fmt, arglist ); - opal_output( 0, buffer ); - free( buffer ); - } - va_end(arglist); - - /* cleanup my session directory */ - orte_session_dir_finalize(orte_process_info.my_name); - - /* abnormal exit */ - orte_abort(error_code, false); -} - -/* - * This function gets called when we desperately need to just die. - * Nothing can be done by definition here - this function ONLY gets - * called as an absolute last resort - */ -void orte_errmgr_orted_abort(void) -{ - /* cleanup my session directory */ - orte_session_dir_finalize(orte_process_info.my_name); - - /* abnormal exit */ - orte_abort(-1, false); -} - -/* - * This function is called by the orted to request that some set of processes - * be aborted by the HNP. This would likely be an unusual request as the orted - * would have no knowledge of other processes or real reason to order them killed. - * Still, the capability is provided here. - */ -int orte_errmgr_orted_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - /* protect us against error */ - if (NULL == procs) { - return ORTE_ERR_BAD_PARAM; - } - - command = ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the number of procs we are requesting be aborted */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &nprocs, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the array of proc names */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, procs, nprocs, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_orted_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_orted_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - -/* - * It is imperative that ONLY an HNP perform this registration! - */ -int orte_errmgr_orted_register_job(orte_jobid_t job) -{ - return ORTE_ERR_NOT_AVAILABLE; -} diff --git a/orte/mca/errmgr/orted/errmgr_orted.h b/orte/mca/errmgr/orted/errmgr_orted.h deleted file mode 100644 index a8612c6035..0000000000 --- a/orte/mca/errmgr/orted/errmgr_orted.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef ORTE_ERRMGR_ORTED_H -#define ORTE_ERRMGR_ORTED_H - - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" - -#include "orte/mca/errmgr/errmgr.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_errmgr_orted_open(void); -int orte_errmgr_orted_close(void); - - -/* - * Startup / Shutdown - */ -orte_errmgr_base_module_t* -orte_errmgr_orted_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); - -int orte_errmgr_orted_finalize(void); - -/* - * globals used within the component - */ -typedef struct { - int debug; - orte_process_name_t *replica; -} orte_errmgr_orted_globals_t; - - -extern orte_errmgr_orted_globals_t orte_errmgr_orted_globals; - -/* - * Component API functions - */ -int orte_errmgr_orted_proc_aborted(orte_gpr_notify_message_t *msg); - -int orte_errmgr_orted_incomplete_start(orte_gpr_notify_message_t *msg); - -void orte_errmgr_orted_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); - -void orte_errmgr_orted_abort(void) __opal_attribute_noreturn__; - -int orte_errmgr_orted_register_job(orte_jobid_t job); - -int orte_errmgr_orted_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); - -ORTE_MODULE_DECLSPEC extern mca_errmgr_base_component_t mca_errmgr_orted_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/errmgr/orted/errmgr_orted_component.c b/orte/mca/errmgr/orted/errmgr_orted_component.c deleted file mode 100644 index f240ea2740..0000000000 --- a/orte/mca/errmgr/orted/errmgr_orted_component.c +++ /dev/null @@ -1,165 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_orted.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_errmgr_base_component_t mca_errmgr_orted_component = { - { - ORTE_ERRMGR_BASE_VERSION_1_3_0, - - "orted", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_errmgr_orted_open, /* module open */ - orte_errmgr_orted_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_errmgr_orted_component_init, /* module init */ - orte_errmgr_orted_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_errmgr_base_module_t orte_errmgr_orted = { - orte_errmgr_base_log, - orte_errmgr_orted_proc_aborted, - orte_errmgr_orted_incomplete_start, - orte_errmgr_orted_error_detected, - orte_errmgr_orted_register_job, - orte_errmgr_orted_abort, - orte_errmgr_orted_abort_procs_request -}; - - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* local globals */ -orte_errmgr_orted_globals_t orte_errmgr_orted_globals; - - -/* - * Open the component - */ -int orte_errmgr_orted_open(void) -{ - int id, tmp; - - id = mca_base_param_register_int("errmgr", "orted", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_errmgr_orted_globals.debug = true; - } else { - orte_errmgr_orted_globals.debug = false; - } - - return ORTE_SUCCESS; -} - -/* - * Close the component - */ -int orte_errmgr_orted_close(void) -{ - return ORTE_SUCCESS; -} - -orte_errmgr_base_module_t* -orte_errmgr_orted_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - if (orte_errmgr_orted_globals.debug) { - opal_output(0, "errmgr_orted_init called"); - } - - /* If we are not a daemon, then this component is not for us! */ - if (!orte_process_info.daemon) { - /* don't take me! */ - return NULL; - } - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other components). */ - - *priority = 10; - - /* no part of OpenRTE allows or has threads */ - - *allow_multi_user_threads = false; - *have_hidden_threads = false; - - /* define the HNP we should be talking to - for now, - * just use the NS replica - */ - orte_errmgr_orted_globals.replica = orte_process_info.ns_replica; - - initialized = true; - return &orte_errmgr_orted; -} - -/* - * finalize routine - */ -int orte_errmgr_orted_finalize(void) -{ - if (orte_errmgr_orted_globals.debug) { - opal_output(0, "%s errmgr_orted_finalize called", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - initialized = false; - - /* All done */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/proxy/errmgr_proxy.c b/orte/mca/errmgr/proxy/errmgr_proxy.c deleted file mode 100644 index e45daf23e1..0000000000 --- a/orte/mca/errmgr/proxy/errmgr_proxy.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/runtime/runtime.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/errmgr/base/errmgr_private.h" -#include "orte/mca/errmgr/proxy/errmgr_proxy.h" - -/* - * This function gets called when the SMR updates a process state to - * indicate that it aborted. Since the proxy component is only active on - * non-HNP processes, this function will NEVER be called - */ -int orte_errmgr_proxy_proc_aborted(orte_gpr_notify_message_t *msg) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when the SMR updates a process state to - * indicate that it failed to start. Since the proxy component is only active on - * non-HNP processes, this function will NEVER be called - */ -int orte_errmgr_proxy_incomplete_start(orte_gpr_notify_message_t *msg) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_AVAILABLE; -} - -/* - * This function gets called when a process detects an internal error. - * Various non-HNP/non-orted errmgr components will deal with this in various - * ways - for now, we simply abort and provide the error_code as our - * exit status - */ -void orte_errmgr_proxy_error_detected(int error_code, char *fmt, ...) -{ - va_list arglist; - - OPAL_TRACE(1); - - /* If there was a message, output it */ - - va_start(arglist, fmt); - if( NULL != fmt ) { - char* buffer = NULL; - vasprintf( &buffer, fmt, arglist ); - opal_output( 0, buffer ); - free( buffer ); - } - va_end(arglist); - - orte_abort(error_code, true); -} - -/* - * This function gets called when a process desperately needs to just die. - * Nothing can be done by definition here - this function ONLY gets - * called as an absolute last resort. - */ -void orte_errmgr_proxy_abort() -{ - /* abnormal exit */ - orte_abort(-1, true); -} - -/* - * Alternatively, some systems (e.g., OpenMPI) need to tell us to kill - * some other subset of processes along with us. Send that info to the - * HNP so it can kill them. - * - * NOTE: this function assumes that the underlying ORTE infrastructure is - * still operational. Use of this function should therefore be restricted - * to cases where the problem is in a higher layer (e.g., MPI) as the - * process is likely to "hang" if an ORTE problem has been encountered. - */ -int orte_errmgr_proxy_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - /* protect us against error */ - if (NULL == procs) { - return ORTE_ERR_BAD_PARAM; - } - - command = ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the number of procs we are requesting be aborted */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &nprocs, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the array of proc names */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, procs, nprocs, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_proxy_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - -/* - * It is imperative that ONLY an HNP perform this registration! - */ -int orte_errmgr_proxy_register_job(orte_jobid_t job) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_errmgr_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - command = ORTE_ERRMGR_REGISTER_JOB_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* pack the jobid we are requesting be monitored */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - /* send the request */ - if (0 > orte_rml.send_buffer(orte_errmgr_proxy_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* setup a buffer for the answer */ - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* enter a blocking receive until we hear back */ - if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* check that this is the right command */ - if (ORTE_ERRMGR_REGISTER_JOB_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - /* clean up and leave */ - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/proxy/errmgr_proxy.h b/orte/mca/errmgr/proxy/errmgr_proxy.h deleted file mode 100644 index 7d5ed8e4fe..0000000000 --- a/orte/mca/errmgr/proxy/errmgr_proxy.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef ORTE_ERRMGR_PROXY_H -#define ORTE_ERRMGR_PROXY_H - - -#include "orte_config.h" -#include "orte/orte_types.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" - -#include "orte/mca/errmgr/errmgr.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_errmgr_proxy_open(void); -int orte_errmgr_proxy_close(void); - - -/* - * Startup / Shutdown - */ -orte_errmgr_base_module_t* -orte_errmgr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); - -int orte_errmgr_proxy_finalize(void); - -/* - * globals used within the component - */ -typedef struct { - int debug; - orte_process_name_t *replica; -} orte_errmgr_proxy_globals_t; - - -extern orte_errmgr_proxy_globals_t orte_errmgr_proxy_globals; - -/* - * Component API functions - */ -int orte_errmgr_proxy_proc_aborted(orte_gpr_notify_message_t *msg); - -int orte_errmgr_proxy_incomplete_start(orte_gpr_notify_message_t *msg); - -void orte_errmgr_proxy_error_detected(int error_code, char *fmt, ...) __opal_attribute_format__(__printf__, 2, 3); - -void orte_errmgr_proxy_abort(void) __opal_attribute_noreturn__; - -int orte_errmgr_proxy_register_job(orte_jobid_t job); - -int orte_errmgr_proxy_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); - -ORTE_MODULE_DECLSPEC extern mca_errmgr_base_component_t mca_errmgr_proxy_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/errmgr/proxy/errmgr_proxy_component.c b/orte/mca/errmgr/proxy/errmgr_proxy_component.c deleted file mode 100644 index 11953f9dd4..0000000000 --- a/orte/mca/errmgr/proxy/errmgr_proxy_component.c +++ /dev/null @@ -1,164 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_proxy.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_errmgr_base_component_t mca_errmgr_proxy_component = { - { - ORTE_ERRMGR_BASE_VERSION_1_3_0, - - "proxy", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_errmgr_proxy_open, /* module open */ - orte_errmgr_proxy_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_errmgr_proxy_component_init, /* module init */ - orte_errmgr_proxy_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_errmgr_base_module_t orte_errmgr_proxy = { - orte_errmgr_base_log, - orte_errmgr_proxy_proc_aborted, - orte_errmgr_proxy_incomplete_start, - orte_errmgr_proxy_error_detected, - orte_errmgr_proxy_register_job, - orte_errmgr_proxy_abort, - orte_errmgr_proxy_abort_procs_request -}; - - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* local globals */ -orte_errmgr_proxy_globals_t orte_errmgr_proxy_globals; - -/* - * Open the component - */ -int orte_errmgr_proxy_open(void) -{ - int id, tmp; - - id = mca_base_param_register_int("errmgr", "proxy", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_errmgr_proxy_globals.debug = true; - } else { - orte_errmgr_proxy_globals.debug = false; - } - - return ORTE_SUCCESS; -} - -/* - * Close the component - */ -int orte_errmgr_proxy_close(void) -{ - return ORTE_SUCCESS; -} - -orte_errmgr_base_module_t* -orte_errmgr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - if (orte_errmgr_proxy_globals.debug) { - opal_output(0, "errmgr_proxy_init called"); - } - - /* If we are an HNP or an orted, then don't pick us! */ - if (orte_process_info.seed || orte_process_info.daemon) { - /* don't take me! */ - return NULL; - } - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other components). */ - - *priority = 10; - - /* no part of OpenRTE allows or has threads */ - - *allow_multi_user_threads = false; - *have_hidden_threads = false; - - /* define the replica for us to use - for now, just point - * to the name service replica - */ - orte_errmgr_proxy_globals.replica = orte_process_info.ns_replica; - - initialized = true; - return &orte_errmgr_proxy; -} - -/* - * finalize routine - */ -int orte_errmgr_proxy_finalize(void) -{ - if (orte_errmgr_proxy_globals.debug) { - opal_output(0, "%s errmgr_proxy_finalize called", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - initialized = false; - - /* All done */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sds/Makefile.am b/orte/mca/ess/Makefile.am similarity index 86% rename from orte/mca/sds/Makefile.am rename to orte/mca/ess/Makefile.am index 3b481a496d..7a251654bf 100644 --- a/orte/mca/sds/Makefile.am +++ b/orte/mca/ess/Makefile.am @@ -19,21 +19,21 @@ AM_CPPFLAGS = $(LTDLINCL) # main library setup -noinst_LTLIBRARIES = libmca_sds.la -libmca_sds_la_SOURCES = +noinst_LTLIBRARIES = libmca_ess.la +libmca_ess_la_SOURCES = # header setup nobase_orte_HEADERS = dist_pkgdata_DATA = # local files -headers = sds.h -libmca_sds_la_SOURCES += $(headers) +headers = ess.h +libmca_ess_la_SOURCES += $(headers) # Conditionally install the header files if WANT_INSTALL_HEADERS nobase_orte_HEADERS += $(headers) -ortedir = $(includedir)/openmpi/orte/mca/sds +ortedir = $(includedir)/openmpi/orte/mca/ess else ortedir = $(includedir) endif diff --git a/orte/mca/sds/alps/Makefile.am b/orte/mca/ess/alps/Makefile.am similarity index 73% rename from orte/mca/sds/alps/Makefile.am rename to orte/mca/ess/alps/Makefile.am index e3e128a998..9c2f3a31f5 100644 --- a/orte/mca/sds/alps/Makefile.am +++ b/orte/mca/ess/alps/Makefile.am @@ -9,7 +9,6 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008 UT-Battelle, LLC # $COPYRIGHT$ # # Additional copyrights may follow @@ -22,30 +21,30 @@ sources = \ - sds_alps.h \ - sds_alps_component.c \ - sds_alps_module.c + ess_alps.h \ + ess_alps_component.c \ + ess_alps_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_sds_alps_DSO +if OMPI_BUILD_ess_alps_DSO component_noinst = -component_install = mca_sds_alps.la +component_install = mca_ess_alps.la else -component_noinst = libmca_sds_alps.la +component_noinst = libmca_ess_alps.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sds_alps_la_SOURCES = $(sources) -mca_sds_alps_la_LDFLAGS = -module -avoid-version -mca_sds_alps_la_LIBADD = \ +mca_ess_alps_la_SOURCES = $(sources) +mca_ess_alps_la_LDFLAGS = -module -avoid-version +mca_ess_alps_la_LIBADD = \ $(top_ompi_builddir)/orte/libopen-rte.la \ $(top_ompi_builddir)/opal/libopen-pal.la noinst_LTLIBRARIES = $(component_noinst) -libmca_sds_alps_la_SOURCES =$(sources) -libmca_sds_alps_la_LDFLAGS = -module -avoid-version +libmca_ess_alps_la_SOURCES =$(sources) +libmca_ess_alps_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sds/alps/configure.m4 b/orte/mca/ess/alps/configure.m4 similarity index 83% rename from orte/mca/sds/alps/configure.m4 rename to orte/mca/ess/alps/configure.m4 index 417cbd36d4..6b4187c36b 100644 --- a/orte/mca/sds/alps/configure.m4 +++ b/orte/mca/ess/alps/configure.m4 @@ -10,7 +10,6 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008 UT-Battelle, LLC # $COPYRIGHT$ # # Additional copyrights may follow @@ -18,10 +17,10 @@ # $HEADER$ # -# MCA_sds_alps_CONFIG([action-if-found], [action-if-not-found]) +# MCA_ess_alps_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_sds_alps_CONFIG],[ - OMPI_CHECK_ALPS([sds_alps], +AC_DEFUN([MCA_ess_alps_CONFIG],[ + OMPI_CHECK_ALPS([ess_alps], [AC_CHECK_FUNC([cnos_get_rank], [$1], [$2])], [$2]) ])dnl diff --git a/orte/mca/ess/alps/configure.params b/orte/mca/ess/alps/configure.params new file mode 100644 index 0000000000..93a965d5a9 --- /dev/null +++ b/orte/mca/ess/alps/configure.params @@ -0,0 +1,30 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that, if we can build, +# only ALPS component will build. This is set higher +# than the CNOS component to ensure we don't get both +# since the ALPS component will -only- build if specifically +# ordered to do so - which means we don't want the CNOS one + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/oob/oob_types.h b/orte/mca/ess/alps/ess_alps.h similarity index 72% rename from orte/mca/oob/oob_types.h rename to orte/mca/ess/alps/ess_alps.h index 3e502cf4b2..305bc34f69 100644 --- a/orte/mca/oob/oob_types.h +++ b/orte/mca/ess/alps/ess_alps.h @@ -15,17 +15,20 @@ * * $HEADER$ */ -/** @file: - * - * the oob framework + +#ifndef ORTE_ESS_ALPS_H +#define ORTE_ESS_ALPS_H + +BEGIN_C_DECLS + +/* + * Module open / close */ +int orte_ess_alps_component_open(void); +int orte_ess_alps_component_close(void); +orte_ess_base_module_t* orte_ess_alps_component_init(int *priority); -#ifndef _MCA_OOB_TYPES_H_ -#define _MCA_OOB_TYPES_H_ -#include "orte_config.h" -#include "orte/orte_constants.h" +END_C_DECLS -#define ORTE_OOB_SUBSCRIPTION "orte-oob-sub" - -#endif /* MCA_OOB_TYPES_H */ +#endif /* ORTE_ESS_ALPS_H */ diff --git a/orte/mca/sds/singleton/sds_singleton_component.c b/orte/mca/ess/alps/ess_alps_component.c similarity index 69% rename from orte/mca/sds/singleton/sds_singleton_component.c rename to orte/mca/ess/alps/ess_alps_component.c index 95d4eba247..1543029aab 100644 --- a/orte/mca/sds/singleton/sds_singleton_component.c +++ b/orte/mca/ess/alps/ess_alps_component.c @@ -23,65 +23,66 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/singleton/sds_singleton.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_singleton_module; +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/alps/ess_alps.h" + +extern orte_ess_base_module_t orte_ess_alps_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_singleton_component = { +orte_ess_base_component_t mca_ess_alps_component = { /* First, the mca_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ - "singleton", + "alps", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_singleton_component_open, - orte_sds_singleton_component_close + orte_ess_alps_component_open, + orte_ess_alps_component_close }, /* Next the MCA v1.0.0 component meta data */ { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT + /* The component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE }, /* Initialization / querying functions */ - orte_sds_singleton_component_init + orte_ess_alps_component_init }; int -orte_sds_singleton_component_open(void) +orte_ess_alps_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_singleton_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_alps_component_init(int *priority) { - *priority = 0; - return &orte_sds_singleton_module; + *priority = 35; + return &orte_ess_alps_module; } int -orte_sds_singleton_component_close(void) +orte_ess_alps_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c new file mode 100644 index 0000000000..70e22fd232 --- /dev/null +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/util/show_help.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/alps/ess_alps.h" + +static int alps_set_name(void); + +static int rte_init(char flags); +static int rte_finalize(void); + + +orte_ess_base_module_t orte_ess_alps_module = { + rte_init, + rte_finalize, + orte_ess_base_app_abort +}; + + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + + /* Start by getting a unique name */ + alps_set_name(); + + /* if I am a daemon, complete my setup using the + * default procedure + */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_orted_setup"; + goto error; + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto error; + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_app_setup"; + goto error; + } + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +static int rte_finalize(void) +{ + int ret; + + /* if I am a daemon, finalize using the default procedure */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { + ORTE_ERROR_LOG(ret); + } + } + + return ret; +} + + +static int alps_set_name(void) +{ + int rc; + int id; + orte_jobid_t jobid; + orte_vpid_t starting_vpid; + char* jobid_string; + char* vpid_string; + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "ess:alps setting name")); + + id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); + mca_base_param_lookup_string(id, &jobid_string); + if (NULL == jobid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); + mca_base_param_lookup_string(id, &vpid_string); + if (NULL == vpid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&starting_vpid, vpid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + ORTE_PROC_MY_NAME->jobid = jobid; + ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank() + starting_vpid; + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "ess:alps set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size(); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/sds/base/Makefile.am b/orte/mca/ess/base/Makefile.am similarity index 72% rename from orte/mca/sds/base/Makefile.am rename to orte/mca/ess/base/Makefile.am index 087575182f..790466afba 100644 --- a/orte/mca/sds/base/Makefile.am +++ b/orte/mca/ess/base/Makefile.am @@ -16,16 +16,18 @@ # $HEADER$ # -dist_pkgdata_DATA += base/help-sds-base.txt +dist_pkgdata_DATA += base/help-ess-base.txt headers += \ base/base.h -libmca_sds_la_SOURCES += \ - base/sds_base_close.c \ - base/sds_base_open.c \ - base/sds_base_select.c \ - base/sds_base_interface.c \ - base/sds_base_universe.c \ - base/sds_base_get.c \ - base/sds_base_put.c +libmca_ess_la_SOURCES += \ + base/ess_base_close.c \ + base/ess_base_open.c \ + base/ess_base_select.c \ + base/ess_base_get.c \ + base/ess_base_put.c \ + base/ess_base_std_tool.c \ + base/ess_base_std_app.c \ + base/ess_base_std_orted.c + diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h new file mode 100644 index 0000000000..e4a5044b6b --- /dev/null +++ b/orte/mca/ess/base/base.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef MCA_ESS_BASE_H +#define MCA_ESS_BASE_H + +#include "orte_config.h" +#include "orte/types.h" + +#include "opal/mca/mca.h" +#include "orte/mca/ess/ess.h" + +BEGIN_C_DECLS + +/* + * Global functions for MCA overall collective open and close + */ + +/** + * Open the ess framework + */ +ORTE_DECLSPEC int orte_ess_base_open(void); + +/** + * Select a ess module + */ +ORTE_DECLSPEC int orte_ess_base_select(void); + +/** + * Close the ess framework + */ +ORTE_DECLSPEC int orte_ess_base_close(void); + +/* + * The verbose channel for debug output + */ +ORTE_DECLSPEC extern int orte_ess_base_output; + +/* + * Internal helper functions used by components + */ +ORTE_DECLSPEC int orte_ess_env_get(void); + +ORTE_DECLSPEC int orte_ess_base_app_setup(void); +ORTE_DECLSPEC int orte_ess_base_app_finalize(void); +ORTE_DECLSPEC void orte_ess_base_app_abort(int status, bool report) __opal_attribute_noreturn__; + +ORTE_DECLSPEC int orte_ess_base_tool_setup(void); +ORTE_DECLSPEC int orte_ess_base_tool_finalize(void); + +ORTE_DECLSPEC int orte_ess_base_orted_setup(void); +ORTE_DECLSPEC int orte_ess_base_orted_finalize(void); + + +/* + * Put functions + */ +ORTE_DECLSPEC int orte_ess_env_put(orte_std_cntr_t num_procs, + orte_std_cntr_t num_local_procs, + char ***env); + +ORTE_DECLSPEC extern opal_list_t orte_ess_base_components_available; + +END_C_DECLS + +#endif diff --git a/orte/mca/ess/base/ess_base_close.c b/orte/mca/ess/base/ess_base_close.c new file mode 100644 index 0000000000..29544b05b3 --- /dev/null +++ b/orte/mca/ess/base/ess_base_close.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_component_repository.h" +#include "opal/util/output.h" + +#include "orte/mca/ess/base/base.h" + +extern opal_list_t orte_ess_base_components_available; + +int +orte_ess_base_close(void) +{ + opal_list_item_t *item; + mca_base_component_list_item_t *cli; + + /* unload all remaining components */ + while (NULL != (item = opal_list_remove_first(&orte_ess_base_components_available))) { + orte_ess_base_component_t* component; + cli = (mca_base_component_list_item_t *) item; + component = (orte_ess_base_component_t *) cli->cli_component; + opal_output_verbose(10, 0, + "orte_ess_base_close: module %s unloaded", + component->ess_version.mca_component_name); + mca_base_component_repository_release((mca_base_component_t *) component); + OBJ_RELEASE(item); + } + + OBJ_DESTRUCT(&orte_ess_base_components_available); + return ORTE_SUCCESS; +} + diff --git a/orte/mca/sds/base/sds_base_get.c b/orte/mca/ess/base/ess_base_get.c similarity index 63% rename from orte/mca/sds/base/sds_base_get.c rename to orte/mca/ess/base/ess_base_get.c index c0de6b8bb8..6f0a479158 100644 --- a/orte/mca/sds/base/sds_base_get.c +++ b/orte/mca/ess/base/ess_base_get.c @@ -17,6 +17,7 @@ */ #include "orte_config.h" +#include "orte/constants.h" #ifdef HAVE_UNISTD_H #include @@ -27,21 +28,20 @@ #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/mca/base/mca_base_param.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/base/base.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/errmgr/base/base.h" -int orte_sds_env_get(void) +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/proc_info.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_env_get(void) { int num_procs; - int local_rank; - int num_local_procs; - int id; - id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1); - mca_base_param_lookup_int(id, &num_procs); + mca_base_param_reg_int_name("orte", "ess_num_procs", + "Used to discover the number of procs in the job", + true, false, -1, &num_procs); + if (num_procs < 0) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; @@ -52,17 +52,19 @@ int orte_sds_env_get(void) * to set it for orteds - so just set it to an invalid value which indicates * it wasn't found if it isn't there */ - id = mca_base_param_register_int("ns", "nds", "local_rank", NULL, ORTE_VPID_INVALID); - mca_base_param_lookup_int(id, &local_rank); - orte_process_info.local_rank = (orte_vpid_t)local_rank; + mca_base_param_reg_int_name("orte", "ess_local_rank", + "Used to discover the local rank of a process on a node", + true, false, (int)ORTE_VPID_INVALID, &num_procs); + orte_process_info.local_rank = (orte_vpid_t)num_procs; /* it is okay for this param not to be found - for example, we don't bother * to set it for orteds - so just set it to a value which indicates * it wasn't found if it isn't there */ - id = mca_base_param_register_int("ns", "nds", "num_local_procs", NULL, 0); - mca_base_param_lookup_int(id, &num_local_procs); - orte_process_info.num_local_procs = (orte_std_cntr_t)num_local_procs; + mca_base_param_reg_int_name("orte", "ess_num_local_procs", + "Used to discover the number of processes on a node", + true, false, -1, &num_procs); + orte_process_info.num_local_procs = (orte_std_cntr_t)num_procs; return ORTE_SUCCESS; } diff --git a/orte/mca/sds/base/sds_base_open.c b/orte/mca/ess/base/ess_base_open.c similarity index 68% rename from orte/mca/sds/base/sds_base_open.c rename to orte/mca/ess/base/ess_base_open.c index d3fbc1700c..2238a3a818 100644 --- a/orte/mca/sds/base/sds_base_open.c +++ b/orte/mca/ess/base/ess_base_open.c @@ -18,11 +18,13 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" + #include "opal/util/output.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" -#include "orte/mca/sds/base/base.h" + +#include "orte/mca/ess/base/base.h" /* @@ -31,20 +33,26 @@ * module's public mca_base_module_t struct. */ -#include "orte/mca/sds/base/static-components.h" +#include "orte/mca/ess/base/static-components.h" -opal_list_t orte_sds_base_components_available; -orte_sds_base_module_t *orte_sds_base_module = NULL; +opal_list_t orte_ess_base_components_available; +orte_ess_base_module_t orte_ess = { + NULL, + NULL +}; +int orte_ess_base_output; int -orte_sds_base_open(void) +orte_ess_base_open(void) { - OBJ_CONSTRUCT(&orte_sds_base_components_available, opal_list_t); + orte_ess_base_output = opal_output_open(NULL); + + OBJ_CONSTRUCT(&orte_ess_base_components_available, opal_list_t); /* Open up all available components */ if (ORTE_SUCCESS != - mca_base_components_open("sds", 0, mca_sds_base_static_components, - &orte_sds_base_components_available, + mca_base_components_open("ess", orte_ess_base_output, mca_ess_base_static_components, + &orte_ess_base_components_available, true)) { return ORTE_ERROR; } diff --git a/orte/mca/ess/base/ess_base_put.c b/orte/mca/ess/base/ess_base_put.c new file mode 100644 index 0000000000..805976f712 --- /dev/null +++ b/orte/mca/ess/base/ess_base_put.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#include "opal/util/opal_environ.h" +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_env_put(orte_std_cntr_t num_procs, + orte_std_cntr_t num_local_procs, + char ***env) +{ + char* param; + char* value; + + /* tell the SDS to select the env component */ + if(NULL == (param = mca_base_param_environ_variable("ess",NULL,NULL))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_setenv(param, "env", true, env); + free(param); + + /* since we want to pass the name as separate components, make sure + * that the "name" environmental variable is cleared! + */ + if(NULL == (param = mca_base_param_environ_variable("orte","ess","name"))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_unsetenv(param, env); + free(param); + + asprintf(&value, "%ld", (long) num_procs); + if(NULL == (param = mca_base_param_environ_variable("orte","ess","num_procs"))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_setenv(param, value, true, env); + free(param); + /* although the num_procs is the comm_world size, users + * would appreciate being given a public environmental variable + * that also represents this value - something MPI specific - so + * do that here. + * + * AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT. + * We know - just live with it + */ + opal_setenv("MPI_COMM_WORLD_SIZE", value, true, env); + free(value); + + asprintf(&value, "%ld", (long) num_local_procs); + if(NULL == (param = mca_base_param_environ_variable("orte","ess","num_local_procs"))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_setenv(param, value, true, env); + free(param); + free(value); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/sds/base/sds_base_select.c b/orte/mca/ess/base/ess_base_select.c similarity index 57% rename from orte/mca/sds/base/sds_base_select.c rename to orte/mca/ess/base/ess_base_select.c index 28e38c5179..82b09e0282 100644 --- a/orte/mca/sds/base/sds_base_select.c +++ b/orte/mca/ess/base/ess_base_select.c @@ -17,53 +17,53 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include "opal/class/opal_list.h" #include "opal/util/output.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_component_repository.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/base/base.h" -extern opal_list_t orte_sds_base_components_available; -extern orte_sds_base_module_t *orte_sds_base_module; +#include "orte/mca/ess/base/base.h" + +extern opal_list_t orte_ess_base_components_available; int -orte_sds_base_select(void) +orte_ess_base_select(void) { opal_list_item_t *item; mca_base_component_list_item_t *cli; int selected_priority = -1; - orte_sds_base_component_t *selected_component = NULL; - orte_sds_base_module_t *selected_module = NULL; + orte_ess_base_component_t *selected_component = NULL; + orte_ess_base_module_t *selected_module = NULL; /* Traverse the list of opened modules; call their init functions. */ - for(item = opal_list_get_first(&orte_sds_base_components_available); - item != opal_list_get_end(&orte_sds_base_components_available); + for(item = opal_list_get_first(&orte_ess_base_components_available); + item != opal_list_get_end(&orte_ess_base_components_available); item = opal_list_get_next(item)) { - orte_sds_base_component_t* component; + orte_ess_base_component_t* component; cli = (mca_base_component_list_item_t *) item; - component = (orte_sds_base_component_t *) cli->cli_component; + component = (orte_ess_base_component_t *) cli->cli_component; - opal_output_verbose(10, 0, - "orte_sds_base_select: initializing %s component %s", - component->sds_version.mca_type_name, - component->sds_version.mca_component_name); + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: initializing %s component %s", + component->ess_version.mca_type_name, + component->ess_version.mca_component_name); - if (NULL == component->sds_init) { - opal_output_verbose(10, 0, - "orte_sds_base_select: no init function; ignoring component"); + if (NULL == component->ess_init) { + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: no init function; ignoring component"); } else { int priority; - orte_sds_base_module_t* module = component->sds_init(&priority); + orte_ess_base_module_t* module = component->ess_init(&priority); /* If the component didn't initialize, remove it from the opened list and remove it from the component repository */ if (NULL == module) { - opal_output_verbose(10, 0, - "orte_sds_base_select: init returned failure"); + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: init returned failure"); continue; } @@ -80,18 +80,18 @@ orte_sds_base_select(void) } /* unload all components that were not selected */ - item = opal_list_get_first(&orte_sds_base_components_available); - while(item != opal_list_get_end(&orte_sds_base_components_available)) { + item = opal_list_get_first(&orte_ess_base_components_available); + while(item != opal_list_get_end(&orte_ess_base_components_available)) { opal_list_item_t* next = opal_list_get_next(item); - orte_sds_base_component_t* component; + orte_ess_base_component_t* component; cli = (mca_base_component_list_item_t *) item; - component = (orte_sds_base_component_t *) cli->cli_component; + component = (orte_ess_base_component_t *) cli->cli_component; if(component != selected_component) { - opal_output_verbose(10, 0, - "orte_sds_base_select: module %s unloaded", - component->sds_version.mca_component_name); + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: module %s unloaded", + component->ess_version.mca_component_name); mca_base_component_repository_release((mca_base_component_t *) component); - opal_list_remove_item(&orte_sds_base_components_available, item); + opal_list_remove_item(&orte_ess_base_components_available, item); OBJ_RELEASE(item); } item = next; @@ -99,7 +99,10 @@ orte_sds_base_select(void) /* setup reference to selected module */ if(NULL != selected_module) { - orte_sds_base_module = selected_module; + opal_output_verbose(5, orte_ess_base_output, + "orte_ess_base_select: module %s selected", + selected_component->ess_version.mca_component_name); + orte_ess = *selected_module; } return ORTE_SUCCESS; } diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c new file mode 100644 index 0000000000..16c2110817 --- /dev/null +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/util/os_path.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/grpcomm/base/base.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/filem/base/base.h" +#include "orte/mca/iof/base/base.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/session_dir.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_cr.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wait.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_base_app_setup(void) +{ + int ret; + char *error = NULL; + char *jobid_str, *procid_str; + + /* Setup the communication infrastructure */ + + /* Runtime Messaging Layer */ + if (ORTE_SUCCESS != (ret = orte_rml_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_select"; + goto error; + } + /* Routed system */ + if (ORTE_SUCCESS != (ret = orte_routed_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_select"; + goto error; + } + + /* + * Group communications + */ + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_select"; + goto error; + } + + /* although only the HNP and orteds open/select the PLM, everyone + * else has access to the PLM env proxy. + * We now provide a chance for the PLM + * to perform any module-specific init functions - non-HNP/orted + * procs will simply perform the PLM proxy init + */ + if (ORTE_SUCCESS != (ret = orte_plm.init())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_init"; + goto error; + } + + /* enable communication via the rml */ + if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml.enable_comm"; + goto error; + } + + /* setup my session directory */ + if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobid_str, ORTE_PROC_MY_NAME->jobid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_jobid_to_string"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_util_convert_vpid_to_string(&procid_str, ORTE_PROC_MY_NAME->vpid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_vpid_to_string"; + goto error; + } + + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s setting up session dir with\n\ttmpdir: %s\n\tuser %s\n\thost %s\n\tjobid %s\n\tprocid %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, + orte_system_info.user, orte_system_info.nodename, jobid_str, procid_str)); + + if (ORTE_SUCCESS != (ret = orte_session_dir(true, + orte_process_info.tmpdir_base, + orte_system_info.user, + orte_system_info.nodename, NULL, + jobid_str, procid_str))) { + if (jobid_str != NULL) free(jobid_str); + if (procid_str != NULL) free(procid_str); + ORTE_ERROR_LOG(ret); + error = "orte_session_dir"; + goto error; + } + if (NULL != jobid_str) { + free(jobid_str); + } + if (NULL != procid_str) { + free(procid_str); + } + + /* Once the session directory location has been established, set + the opal_output env file location to be in the + proc-specific session directory. */ + opal_output_set_output_file_info(orte_process_info.proc_session_dir, + "output-", NULL, NULL); + + + /* setup the routed info - the selected routed component + * will know what to do. Some may put us in a blocking + * receive here so they can get ALL of the contact info + * from our peers. Others may just find the local daemon's + * contact info and immediately return. + */ + if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_routed.init_routes"; + goto error; + } + + /* + * setup I/O forwarding system - must come after we init routes + * so we can get our HNP's name set + */ + if (ORTE_SUCCESS != (ret = orte_iof_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_iof_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_select"; + goto error; + } + + +#if OPAL_ENABLE_FT == 1 + /* + * Setup the SnapC + */ + if (ORTE_SUCCESS != (ret = orte_snapc_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_select"; + goto error; + } + + /* apps need the OPAL CR stuff */ + opal_cr_set_enabled(true); +#else + opal_cr_set_enabled(false); +#endif + + /* Initalize the CR setup + * Note: Always do this, even in non-FT builds. + * If we don't some user level tools may hang. + */ + if (ORTE_SUCCESS != (ret = orte_cr_init())) { + ORTE_ERROR_LOG(ret); + error = "orte_cr_init"; + goto error; + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +int orte_ess_base_app_finalize(void) +{ + orte_cr_finalize(); + +#if OPAL_ENABLE_FT == 1 + orte_snapc_base_close(); +#endif + orte_filem_base_close(); + + orte_wait_finalize(); + orte_iof_base_close(); + + /* now can close the rml and its friendly group comm */ + orte_grpcomm_base_close(); + orte_routed_base_close(); + orte_rml_base_close(); + + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} + +/* + * We do NOT call the regular C-library "abort" function, even + * though that would have alerted us to the fact that this is + * an abnormal termination, because it would automatically cause + * a core file to be generated. On large systems, that can be + * overwhelming (imagine a few thousand Gbyte-sized files hitting + * a shared file system simultaneously...ouch!). + * + * However, this causes a problem for OpenRTE as the system truly + * needs to know that this actually IS an abnormal termination. + * To get around the problem, we create a file in the session + * directory - we don't need to put anything in it, though, as its + * very existence simply alerts us that this was an abnormal + * termination. + * + * The session directory finalize system will clean this file up + * for us automagically. However, it needs to stick around long + * enough for our local daemon to find it! So, we do NOT call + * session_dir_finalize here!!! Someone will clean up for us. + * + * In some cases, however, we DON'T want to create that alert. For + * example, if an orted detects that the HNP has died, then there + * is truly nobody to alert! In these cases, we pass report=false + * to prevent the abort file from being created. This allows the + * session directory tree to cleanly be eliminated. + */ +void orte_ess_base_app_abort(int status, bool report) +{ + char *abort_file; + int fd; + + /* Exit - do NOT do a normal finalize as this will very likely + * hang the process. We are aborting due to an abnormal condition + * that precludes normal cleanup + * + * We do need to do the following bits to make sure we leave a + * clean environment. Taken from orte_finalize(): + * - Assume errmgr cleans up child processes before we exit. + */ + + /* CRS cleanup since it may have a named pipe and thread active */ + orte_cr_finalize(); + + /* If we were asked to report this termination, + * write an "abort" file into our session directory + */ + if (report) { + abort_file = opal_os_path(false, orte_process_info.proc_session_dir, "abort", NULL); + if (NULL == abort_file) { + /* got a problem */ + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + goto CLEANUP; + } + OPAL_OUTPUT_VERBOSE((5, orte_debug_output, + "%s orte_ess_app_abort: dropping abort file %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), abort_file)); + fd = open(abort_file, O_CREAT, 0600); + if (0 < fd) close(fd); + } + +CLEANUP: + /* - Clean out the global structures + * (not really necessary, but good practice) */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + /* Now Exit */ + exit(status); +} diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c new file mode 100644 index 0000000000..7b14e1961f --- /dev/null +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/grpcomm/base/base.h" +#include "orte/mca/iof/base/base.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/odls/base/base.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/base.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/mca/filem/base/base.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/session_dir.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_cr.h" +#include "orte/runtime/orte_wait.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_base_orted_setup(void) +{ + int ret; + char *error = NULL; + char *jobid_str, *procid_str; + + /* if I am a daemon, I still need to open and select the + * the PLM so I can do local spawns, if permitted + */ + if (ORTE_SUCCESS != (ret = orte_plm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_select"; + goto error; + } + + /* Setup the communication infrastructure */ + + /* Runtime Messaging Layer */ + if (ORTE_SUCCESS != (ret = orte_rml_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_select"; + goto error; + } + /* Routed system */ + if (ORTE_SUCCESS != (ret = orte_routed_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_select"; + goto error; + } + /* + * Group communications + */ + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_select"; + goto error; + } + + /* Now provide a chance for the PLM + * to perform any module-specific init functions. This + * needs to occur AFTER the communications are setup + * as it may involve starting a non-blocking recv + */ + if (ORTE_SUCCESS != (ret = orte_plm.init())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_init"; + goto error; + } + + /* Open/select the odls */ + if (ORTE_SUCCESS != (ret = orte_odls_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_odls_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_select"; + goto error; + } + + /* enable communication with the rml */ + if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml.enable_comm"; + goto error; + } + + /* setup my session directory */ + if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobid_str, ORTE_PROC_MY_NAME->jobid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_jobid_to_string"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_util_convert_vpid_to_string(&procid_str, ORTE_PROC_MY_NAME->vpid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_vpid_to_string"; + goto error; + } + + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s setting up session dir with\n\ttmpdir: %s\n\tuser %s\n\thost %s\n\tjobid %s\n\tprocid %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, + orte_system_info.user, orte_system_info.nodename, jobid_str, procid_str)); + + if (ORTE_SUCCESS != (ret = orte_session_dir(true, + orte_process_info.tmpdir_base, + orte_system_info.user, + orte_system_info.nodename, NULL, + jobid_str, procid_str))) { + if (jobid_str != NULL) free(jobid_str); + if (procid_str != NULL) free(procid_str); + ORTE_ERROR_LOG(ret); + error = "orte_session_dir"; + goto error; + } + if (NULL != jobid_str) { + free(jobid_str); + } + if (NULL != procid_str) { + free(procid_str); + } + + /* setup the routed info - the selected routed component + * will know what to do. + */ + if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_routed.init_routes"; + goto error; + } + + /* + * setup I/O forwarding system - must come after we init routes */ + if (ORTE_SUCCESS != (ret = orte_iof_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_iof_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_select"; + goto error; + } + + /* setup the FileM */ + if (ORTE_SUCCESS != (ret = orte_filem_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_filem_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_filem_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_filem_base_select"; + goto error; + } + +#if OPAL_ENABLE_FT == 1 + /* + * Setup the SnapC + */ + if (ORTE_SUCCESS != (ret = orte_snapc_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_select"; + goto error; + } + + /* For daemons, ORTE doesn't need the OPAL CR stuff */ + opal_cr_set_enabled(false); +#else + opal_cr_set_enabled(false); +#endif + + /* + * Initalize the CR setup + * Note: Always do this, even in non-FT builds. + * If we don't some user level tools may hang. + */ + if (ORTE_SUCCESS != (ret = orte_cr_init())) { + ORTE_ERROR_LOG(ret); + error = "orte_cr_init"; + goto error; + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +int orte_ess_base_orted_finalize(void) +{ + orte_cr_finalize(); + +#if OPAL_ENABLE_FT == 1 + orte_snapc_base_close(); +#endif + orte_filem_base_close(); + + orte_odls_base_close(); + + orte_wait_finalize(); + orte_iof_base_close(); + + /* finalize selected modules so they can de-register + * any receives + */ + orte_plm_base_close(); + orte_errmgr_base_close(); + + /* now can close the rml and its friendly group comm */ + orte_grpcomm_base_close(); + orte_routed_base_close(); + orte_rml_base_close(); + + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c new file mode 100644 index 0000000000..96e02f90b3 --- /dev/null +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/errmgr/errmgr.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/session_dir.h" +#include "orte/runtime/orte_cr.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_base_tool_setup(void) +{ + int ret; + char *error = NULL; + + /* Setup the communication infrastructure */ + + /* Runtime Messaging Layer */ + if (ORTE_SUCCESS != (ret = orte_rml_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_select"; + goto error; + } + /* Routed system */ + if (ORTE_SUCCESS != (ret = orte_routed_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_select"; + goto error; + } + + /* since I am a tool, then all I really want to do is communicate. + * So setup communications and be done - finding the HNP + * to which I want to communicate and setting up a route for + * that link is my responsibility + */ + + /* enable communication via the rml */ + if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml.enable_comm"; + goto error; + } + /* we -may- need to know the name of the head + * of our session directory tree, particularly the + * tmp base where any other session directories on + * this node might be located + */ + if (ORTE_SUCCESS != (ret = orte_session_dir_get_name(NULL, + &orte_process_info.tmpdir_base, + &orte_process_info.top_session_dir, + orte_system_info.user, + orte_system_info.nodename, NULL, + NULL, NULL))) { + ORTE_ERROR_LOG(ret); + error = "define session dir names"; + goto error; + } + +#if OPAL_ENABLE_FT == 1 + /* + * Setup the SnapC + */ + if (ORTE_SUCCESS != (ret = orte_snapc_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_select"; + goto error; + } + + /* Tools do not need all the OPAL CR stuff */ + opal_cr_set_enabled(false); +#endif + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +int orte_ess_base_tool_finalize(void) +{ + /* if I am a tool, then all I will have done is + * a very small subset of orte_init - ensure that + * I only back those elements out + */ + orte_routed_base_close(); + orte_rml_base_close(); + + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/sds/base/help-sds-base.txt b/orte/mca/ess/base/help-ess-base.txt similarity index 98% rename from orte/mca/sds/base/help-sds-base.txt rename to orte/mca/ess/base/help-ess-base.txt index 6bc6a85ec2..5e315c74ec 100644 --- a/orte/mca/sds/base/help-sds-base.txt +++ b/orte/mca/ess/base/help-ess-base.txt @@ -18,7 +18,7 @@ # # This is the US/English general help file for the SDS base. # -[sds-base:execv-error] +[ess-base:execv-error] The singleton application was not able to find the executable "orted" in your PATH or in the directory where Open MPI/OpenRTE was initially installed, and therefore cannot continue. diff --git a/orte/mca/errmgr/hnp/Makefile.am b/orte/mca/ess/cnos/Makefile.am similarity index 74% rename from orte/mca/errmgr/hnp/Makefile.am rename to orte/mca/ess/cnos/Makefile.am index 4faeeb8a29..53badad39b 100644 --- a/orte/mca/errmgr/hnp/Makefile.am +++ b/orte/mca/ess/cnos/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - errmgr_hnp.h \ - errmgr_hnp_component.c \ - errmgr_hnp.c + ess_cnos.h \ + ess_cnos_component.c \ + ess_cnos_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_errmgr_hnp_DSO +if OMPI_BUILD_ess_cnos_DSO component_noinst = -component_install = mca_errmgr_hnp.la +component_install = mca_ess_cnos.la else -component_noinst = libmca_errmgr_hnp.la +component_noinst = libmca_ess_cnos.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_errmgr_hnp_la_SOURCES = $(sources) -mca_errmgr_hnp_la_LDFLAGS = -module -avoid-version +mca_ess_cnos_la_SOURCES = $(sources) +mca_ess_cnos_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_errmgr_hnp_la_SOURCES =$(sources) -libmca_errmgr_hnp_la_LDFLAGS = -module -avoid-version +libmca_ess_cnos_la_SOURCES =$(sources) +libmca_ess_cnos_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sds/cnos/configure.m4 b/orte/mca/ess/cnos/configure.m4 similarity index 67% rename from orte/mca/sds/cnos/configure.m4 rename to orte/mca/ess/cnos/configure.m4 index c3216b9d71..b3db391163 100644 --- a/orte/mca/sds/cnos/configure.m4 +++ b/orte/mca/ess/cnos/configure.m4 @@ -17,9 +17,14 @@ # $HEADER$ # -# MCA_sds_cnos_CONFIG([action-if-found], [action-if-not-found]) +# MCA_ess_cnos_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_sds_cnos_CONFIG],[ +AC_DEFUN([MCA_ess_cnos_CONFIG],[ # check for cnos functions - AC_CHECK_FUNC([cnos_get_rank], [$1], [$2]) + # a bit of a hack,,, we don't want ess_cnos if alps + # was requested, and we can't rely on build priority because + # ess_alps uses priorty 10 so that ess_hnp is built as well. + AC_CHECK_FUNC([cnos_get_rank], + [OMPI_CHECK_ALPS([ess_cnos], [$2], [$1])], + [$2]) ])dnl diff --git a/orte/mca/ras/dash_host/configure.params b/orte/mca/ess/cnos/configure.params similarity index 88% rename from orte/mca/ras/dash_host/configure.params rename to orte/mca/ess/cnos/configure.params index e959acf694..4e4404321c 100644 --- a/orte/mca/ras/dash_host/configure.params +++ b/orte/mca/ess/cnos/configure.params @@ -19,7 +19,10 @@ # $HEADER$ # -# Specific to this module - - PARAM_CONFIG_FILES="Makefile" + +# +# Set the config priority so that, if we can build, +# only CNOS component(s) will build + +PARAM_CONFIG_PRIORITY=30 diff --git a/orte/util/universe_setup_file_io.h b/orte/mca/ess/cnos/ess_cnos.h similarity index 66% rename from orte/util/universe_setup_file_io.h rename to orte/mca/ess/cnos/ess_cnos.h index 40f61817e3..89dae1240f 100644 --- a/orte/util/universe_setup_file_io.h +++ b/orte/mca/ess/cnos/ess_cnos.h @@ -1,5 +1,4 @@ /* - * * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. @@ -15,19 +14,21 @@ * Additional copyrights may follow * * $HEADER$ - * - * $Id: ompi_universe_setup_file I/O functions $ - * */ -#ifndef ORTE_UNIV_SETUP_FILE_IO_H -#define ORTE_UNIV_SETUP_FILE_IO_H +#ifndef ORTE_ESS_CNOS_H +#define ORTE_ESS_CNOS_H -#include "orte_config.h" -#include "orte/util/univ_info.h" +BEGIN_C_DECLS -ORTE_DECLSPEC int orte_write_universe_setup_file(char *filename, orte_universe_t *info); +/* + * Module open / close + */ +int orte_ess_cnos_component_open(void); +int orte_ess_cnos_component_close(void); +orte_ess_base_module_t* orte_ess_cnos_component_init(int *priority); -ORTE_DECLSPEC int orte_read_universe_setup_file(char *filename, orte_universe_t *info); -#endif +END_C_DECLS + +#endif /* ORTE_ESS_CNOS_H */ diff --git a/orte/mca/sds/cnos/sds_cnos_component.c b/orte/mca/ess/cnos/ess_cnos_component.c similarity index 63% rename from orte/mca/sds/cnos/sds_cnos_component.c rename to orte/mca/ess/cnos/ess_cnos_component.c index f54d4eab3f..c223c579fd 100644 --- a/orte/mca/sds/cnos/sds_cnos_component.c +++ b/orte/mca/ess/cnos/ess_cnos_component.c @@ -9,7 +9,6 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,25 +23,24 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/cnos/sds_cnos.h" -#include "opal/mca/base/mca_base_param.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/cnos/ess_cnos.h" -extern orte_sds_base_module_t orte_sds_cnos_module; +extern orte_ess_base_module_t orte_ess_cnos_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_cnos_component = { +orte_ess_base_component_t mca_ess_cnos_component = { /* First, the mca_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ "cnos", @@ -51,8 +49,8 @@ orte_sds_base_component_t mca_sds_cnos_component = { ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_cnos_component_open, - orte_sds_cnos_component_close + orte_ess_cnos_component_open, + orte_ess_cnos_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -62,41 +60,33 @@ orte_sds_base_component_t mca_sds_cnos_component = { }, /* Initialization / querying functions */ - orte_sds_cnos_component_init + orte_ess_cnos_component_init }; int -orte_sds_cnos_component_open(void) +orte_ess_cnos_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_cnos_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_cnos_component_init(int *priority) { - int id; - char *mode; - - /* okay, not seed/singleton attempt another approach */ - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - /* if mode isn't NULL, then we have an ORTE starter. Don't use - this component */ - if (NULL != mode) { - free(mode); - return NULL; - } - - *priority = 30; - return &orte_sds_cnos_module; + /* since we are not launched by an ORTE launcher, + * we want to be selected ahead of the singleton + * component if we detect our supported environment. + * So ensure that our priority is higher than + * the singleton's + */ + *priority = 60; + return &orte_ess_cnos_module; } int -orte_sds_cnos_component_close(void) +orte_ess_cnos_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/cnos/ess_cnos_module.c b/orte/mca/ess/cnos/ess_cnos_module.c new file mode 100644 index 0000000000..e68e6f986e --- /dev/null +++ b/orte/mca/ess/cnos/ess_cnos_module.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/util/output.h" + +#include "orte/mca/errmgr/base/base.h" +#include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/runtime/orte_globals.h" +#include "orte/mca/grpcomm/base/base.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/cnos/ess_cnos.h" + +static int rte_init(char flags); +static int rte_finalize(void); +static void rte_abort(int status, bool report) __opal_attribute_noreturn__; + +orte_ess_base_module_t orte_ess_cnos_module = { + rte_init, + rte_finalize, + rte_abort +}; + +static int rte_init(char flags) +{ + int rc; + + /* Get our process information */ + + /* Procs in this environment are directly launched. Hence, there + * was no mpirun to create a jobid for us, and each app proc is + * going to have to fend for itself. For now, we assume that the + * jobid is some arbitrary number (say, 1). + */ + ORTE_PROC_MY_NAME->jobid = 1; + + /* find our vpid from cnos */ + ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank(); + + /* Get the number of procs in the job from cnos */ + orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size(); + + /* MPI_Init needs the grpcomm framework, so we have to init it */ + if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_grpcomm_base_select())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* that's all we need here */ + return ORTE_SUCCESS; +} + + +static int rte_finalize(void) +{ + /* just cleanup the things we used */ + orte_grpcomm_base_close(); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} + +static void rte_abort(int status, bool report) +{ + exit(status); +} diff --git a/orte/mca/gpr/configure.m4 b/orte/mca/ess/configure.m4 similarity index 82% rename from orte/mca/gpr/configure.m4 rename to orte/mca/ess/configure.m4 index 9d8f01f55a..d23bff8708 100644 --- a/orte/mca/gpr/configure.m4 +++ b/orte/mca/ess/configure.m4 @@ -10,4 +10,4 @@ dnl $HEADER$ dnl dnl we only want same priority level components -m4_define(MCA_gpr_CONFIGURE_MODE, STOP_AT_FIRST_PRIORITY) +m4_define(MCA_ess_CONFIGURE_MODE, STOP_AT_FIRST_PRIORITY) diff --git a/orte/mca/pls/poe/Makefile.am b/orte/mca/ess/env/Makefile.am similarity index 75% rename from orte/mca/pls/poe/Makefile.am rename to orte/mca/ess/env/Makefile.am index ce436bd911..73491cad02 100644 --- a/orte/mca/pls/poe/Makefile.am +++ b/orte/mca/ess/env/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - pls_poe.h \ - pls_poe_component.c \ - pls_poe_module.c + ess_env.h \ + ess_env_component.c \ + ess_env_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_pls_poe_DSO +if OMPI_BUILD_ess_env_DSO component_noinst = -component_install = mca_pls_poe.la +component_install = mca_ess_env.la else -component_noinst = libmca_pls_poe.la +component_noinst = libmca_ess_env.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_pls_poe_la_SOURCES = $(sources) -mca_pls_poe_la_LDFLAGS = -module -avoid-version +mca_ess_env_la_SOURCES = $(sources) +mca_ess_env_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_pls_poe_la_SOURCES =$(sources) -libmca_pls_poe_la_LDFLAGS = -module -avoid-version +libmca_ess_env_la_SOURCES =$(sources) +libmca_ess_env_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/ess/env/configure.m4 b/orte/mca/ess/env/configure.m4 new file mode 100644 index 0000000000..99fef655c0 --- /dev/null +++ b/orte/mca/ess/env/configure.m4 @@ -0,0 +1,13 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Sandia National Laboratories. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ess_env_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_ess_env_CONFIG], [$1]) diff --git a/orte/mca/gpr/proxy/configure.params b/orte/mca/ess/env/configure.params similarity index 91% rename from orte/mca/gpr/proxy/configure.params rename to orte/mca/ess/env/configure.params index 0d1335d6d3..c410dd1eaf 100644 --- a/orte/mca/gpr/proxy/configure.params +++ b/orte/mca/ess/env/configure.params @@ -19,7 +19,9 @@ # $HEADER$ # -# Specific to this module - PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that we build +# whenever someone using daemons does + PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/ess/env/ess_env.h b/orte/mca/ess/env/ess_env.h new file mode 100644 index 0000000000..15fe9d73ef --- /dev/null +++ b/orte/mca/ess/env/ess_env.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_ESS_ENV_H +#define ORTE_ESS_ENV_H + +BEGIN_C_DECLS + +/* + * Module open / close + */ +int orte_ess_env_component_open(void); +int orte_ess_env_component_close(void); +orte_ess_base_module_t* orte_ess_env_component_init(int *priority); + + +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_env_component; + +END_C_DECLS + +#endif /* ORTE_ESS_ENV_H */ diff --git a/orte/mca/sds/slurm/sds_slurm_component.c b/orte/mca/ess/env/ess_env_component.c similarity index 58% rename from orte/mca/sds/slurm/sds_slurm_component.c rename to orte/mca/ess/env/ess_env_component.c index 4851cb35a8..901fb6d553 100644 --- a/orte/mca/sds/slurm/sds_slurm_component.c +++ b/orte/mca/ess/env/ess_env_component.c @@ -23,33 +23,36 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/slurm/sds_slurm.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_slurm_module; +#include "orte/util/proc_info.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/env/ess_env.h" + +extern orte_ess_base_module_t orte_ess_env_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_slurm_component = { +orte_ess_base_component_t mca_ess_env_component = { { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ - "slurm", + "env", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_slurm_component_open, - orte_sds_slurm_component_close + orte_ess_env_component_open, + orte_ess_env_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -59,49 +62,46 @@ orte_sds_base_component_t mca_sds_slurm_component = { }, /* Initialization / querying functions */ - orte_sds_slurm_component_init + orte_ess_env_component_init }; int -orte_sds_slurm_component_open(void) +orte_ess_env_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_slurm_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_env_component_init(int *priority) { - int id; - char *slurm_nodeid, *mode; - /* okay, not seed/singleton - see if we're running under slurm */ - slurm_nodeid = getenv("SLURM_NODEID"); - if (NULL == slurm_nodeid) { - return NULL; + /* we are the env module, so set the priority to + * be higher than the tool component so that a + * tool launched as a distributed set of procs + * (i.e., a "tool with name") will select this + * module, but low enough that any other environment + * will override us + */ + + /* if we don't have a path back to the HNP, then we + * were not launched by mpirun, so don't pick us as + * it would be impossible for the correct env vars + * to have been set! + */ + if (NULL != orte_process_info.my_hnp_uri) { + *priority = 20; + return &orte_ess_env_module; } - - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - if (NULL == mode || 0 != strcmp("slurm", mode)) { - if(NULL != mode) { - free(mode); - } - return NULL; - } - - if(NULL != mode) { - free(mode); - } - *priority = 20; - return &orte_sds_slurm_module; + + /* if not, then return NULL - we cannot be selected */ + return NULL; } int -orte_sds_slurm_component_close(void) +orte_ess_env_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c new file mode 100644 index 0000000000..cd444306d2 --- /dev/null +++ b/orte/mca/ess/env/ess_env_module.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/threads/mutex.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/util/os_path.h" +#include "opal/util/cmd_line.h" +#include "opal/util/malloc.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/grpcomm/base/base.h" +#include "orte/mca/iof/base/base.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ras/base/base.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/odls/base/base.h" + +#include "orte/mca/rmaps/base/base.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/mca/filem/base/base.h" +#include "orte/util/proc_info.h" +#include "orte/util/session_dir.h" +#include "orte/util/sys_info.h" +#include "orte/util/hnp_contact.h" +#include "orte/util/name_fns.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_wait.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/runtime/orte_cr.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/env/ess_env.h" + +static int env_set_name(void); + +static int rte_init(char flags); +static int rte_finalize(void); + + +orte_ess_base_module_t orte_ess_env_module = { + rte_init, + rte_finalize, + orte_ess_base_app_abort +}; + + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + + /* Start by getting a unique name from the enviro */ + env_set_name(); + + /* if I am a daemon, complete my setup using the + * default procedure + */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_orted_setup"; + goto error; + } + + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto error; + } + + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_app_setup"; + goto error; + } + + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +static int rte_finalize(void) +{ + int ret; + + /* if I am a daemon, finalize using the default procedure */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { + ORTE_ERROR_LOG(ret); + } + } + + return ret; +} + +static int env_set_name(void) +{ + char *jobid_str, *procid_str; + int id, rc; + orte_jobid_t jobid; + orte_vpid_t vpid; + + id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); + mca_base_param_lookup_string(id, &jobid_str); + if (NULL == jobid_str) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_str))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + free(jobid_str); + + id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); + mca_base_param_lookup_string(id, &procid_str); + if (NULL == procid_str) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, procid_str))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + free(procid_str); + + ORTE_PROC_MY_NAME->jobid = jobid; + ORTE_PROC_MY_NAME->vpid = vpid; + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "ess:env set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* get the non-name common environmental variables */ + if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} + diff --git a/orte/mca/ess/ess.h b/orte/mca/ess/ess.h new file mode 100644 index 0000000000..7522020163 --- /dev/null +++ b/orte/mca/ess/ess.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The OpenRTE Environment-Specific Services + * + */ + +#ifndef ORTE_ESS_H +#define ORTE_ESS_H + +#include "opal/mca/mca.h" + +BEGIN_C_DECLS + +/* + * Module and component structures + */ +struct orte_ess_base_module_1_0_0_t; +typedef struct orte_ess_base_module_1_0_0_t orte_ess_base_module_1_0_0_t; +typedef orte_ess_base_module_1_0_0_t orte_ess_base_module_t; + +struct orte_ess_base_component_1_0_0_t; +typedef struct orte_ess_base_component_1_0_0_t orte_ess_base_component_1_0_0_t; +typedef orte_ess_base_component_1_0_0_t orte_ess_base_component_t; + +/** + * Selection function + */ +typedef orte_ess_base_module_t* +(*orte_ess_base_component_init_fn_t)(int *priority); + +/* + * API functions + */ + +/* + * Initialize the RTE for this environment + */ +typedef int (*orte_ess_base_module_init_fn_t)(char flags); + +/* + * Finalize the RTE for this environment + */ +typedef int (*orte_ess_base_module_finalize_fn_t)(void); + +/** + * Abort the current application + * + * Aborts currently running application, NOTE: We do NOT call the + * regular C-library "abort" function, even + * though that would have alerted us to the fact that this is + * an abnormal termination, because it would automatically cause + * a core file to be generated. The "report" flag indicates if the + * function should create an appropriate file to alert the local + * orted that termination was abnormal. + */ +typedef void (*orte_ess_base_module_abort_fn_t)(int status, bool report); + + +/* + * the standard module data structure + */ +struct orte_ess_base_module_1_0_0_t { + orte_ess_base_module_init_fn_t init; + orte_ess_base_module_finalize_fn_t finalize; + orte_ess_base_module_abort_fn_t abort; +}; + + +/* + * the standard component data structure + */ +struct orte_ess_base_component_1_0_0_t { + mca_base_component_t ess_version; + mca_base_component_data_1_0_0_t ess_data; + orte_ess_base_component_init_fn_t ess_init; +}; + +/* + * Macro for use in components that are of type ess v1.0.0 + */ +#define ORTE_ESS_BASE_VERSION_1_0_0 \ + /* ess v1.0 is chained to MCA v1.0 */ \ + MCA_BASE_VERSION_1_0_0, \ + /* ess v1.0 */ \ + "ess", 1, 0, 0 + +/* Global structure for accessing ESS functions */ +ORTE_DECLSPEC extern orte_ess_base_module_t orte_ess; /* holds selected module's function pointers */ + +END_C_DECLS + +#endif diff --git a/orte/mca/sds/env/Makefile.am b/orte/mca/ess/hnp/Makefile.am similarity index 75% rename from orte/mca/sds/env/Makefile.am rename to orte/mca/ess/hnp/Makefile.am index c3ec6879f1..4cf3a2c567 100644 --- a/orte/mca/sds/env/Makefile.am +++ b/orte/mca/ess/hnp/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - sds_env.h \ - sds_env_component.c \ - sds_env_module.c + ess_hnp.h \ + ess_hnp_component.c \ + ess_hnp_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_sds_env_DSO +if OMPI_BUILD_ess_hnp_DSO component_noinst = -component_install = mca_sds_env.la +component_install = mca_ess_hnp.la else -component_noinst = libmca_sds_env.la +component_noinst = libmca_ess_hnp.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sds_env_la_SOURCES = $(sources) -mca_sds_env_la_LDFLAGS = -module -avoid-version +mca_ess_hnp_la_SOURCES = $(sources) +mca_ess_hnp_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_sds_env_la_SOURCES =$(sources) -libmca_sds_env_la_LDFLAGS = -module -avoid-version +libmca_ess_hnp_la_SOURCES =$(sources) +libmca_ess_hnp_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/ess/hnp/configure.m4 b/orte/mca/ess/hnp/configure.m4 new file mode 100644 index 0000000000..56c49a21e1 --- /dev/null +++ b/orte/mca/ess/hnp/configure.m4 @@ -0,0 +1,13 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Sandia National Laboratories. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ess_hnp_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_ess_hnp_CONFIG], [$1]) diff --git a/orte/mca/ns/proxy/configure.params b/orte/mca/ess/hnp/configure.params similarity index 89% rename from orte/mca/ns/proxy/configure.params rename to orte/mca/ess/hnp/configure.params index 3513f8d956..c410dd1eaf 100644 --- a/orte/mca/ns/proxy/configure.params +++ b/orte/mca/ess/hnp/configure.params @@ -19,6 +19,9 @@ # $HEADER$ # -# Specific to this module - PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that we build +# whenever someone using daemons does + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/ess/hnp/ess_hnp.h b/orte/mca/ess/hnp/ess_hnp.h new file mode 100644 index 0000000000..e086fe7b78 --- /dev/null +++ b/orte/mca/ess/hnp/ess_hnp.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_ESS_HNP_H +#define ORTE_ESS_HNP_H + +BEGIN_C_DECLS + +/* + * Module open / close + */ +int orte_ess_hnp_component_open(void); +int orte_ess_hnp_component_close(void); +orte_ess_base_module_t* orte_ess_hnp_component_init(int *priority); + + +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_hnp_component; + +END_C_DECLS + +#endif /* ORTE_ESS_HNP_H */ diff --git a/orte/mca/sds/seed/sds_seed_component.c b/orte/mca/ess/hnp/ess_hnp_component.c similarity index 68% rename from orte/mca/sds/seed/sds_seed_component.c rename to orte/mca/ess/hnp/ess_hnp_component.c index 950a268411..c009323afe 100644 --- a/orte/mca/sds/seed/sds_seed_component.c +++ b/orte/mca/ess/hnp/ess_hnp_component.c @@ -23,36 +23,36 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/seed/sds_seed.h" #include "opal/mca/base/mca_base_param.h" + #include "orte/util/proc_info.h" -extern orte_sds_base_module_t orte_sds_seed_module; +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/hnp/ess_hnp.h" + +extern orte_ess_base_module_t orte_ess_hnp_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_seed_component = { - /* First, the mca_component_t struct containing meta information - about the component itself */ +orte_ess_base_component_t mca_ess_hnp_component = { { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ - "seed", + "hnp", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_seed_component_open, - orte_sds_seed_component_close + orte_ess_hnp_component_open, + orte_ess_hnp_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -62,29 +62,37 @@ orte_sds_base_component_t mca_sds_seed_component = { }, /* Initialization / querying functions */ - orte_sds_seed_component_init + orte_ess_hnp_component_init }; int -orte_sds_seed_component_open(void) +orte_ess_hnp_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_seed_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_hnp_component_init(int *priority) { - if (orte_process_info.seed == false) return NULL; - *priority = 40; - return &orte_sds_seed_module; + /* we are the hnp module - we need to be selected + * IFF we are designated as the hnp + */ + if (orte_process_info.hnp) { + *priority = 100; + return &orte_ess_hnp_module; + } + + /* else, we are not */ + *priority = -1; + return NULL; } int -orte_sds_seed_component_close(void) +orte_ess_hnp_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c new file mode 100644 index 0000000000..2b236bfbfa --- /dev/null +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_cr.h" + +#include "opal/util/os_path.h" +#include "opal/util/malloc.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/grpcomm/base/base.h" +#include "orte/mca/iof/base/base.h" +#include "orte/mca/ras/base/base.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/odls/base/base.h" + +#include "orte/mca/rmaps/base/base.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/base/base.h" +#endif +#include "orte/mca/filem/base/base.h" +#include "orte/util/proc_info.h" +#include "orte/util/session_dir.h" +#include "orte/util/sys_info.h" +#include "orte/util/hnp_contact.h" +#include "orte/util/name_fns.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_wait.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/runtime/orte_cr.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/hnp/ess_hnp.h" + +static int rte_init(char flags); +static int rte_finalize(void); +static void rte_abort(int status, bool report) __opal_attribute_noreturn__; + + +orte_ess_base_module_t orte_ess_hnp_module = { + rte_init, + rte_finalize, + rte_abort +}; + + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + char *jobid_str, *procid_str, *contact_path; + orte_job_t *jdata; + orte_node_t *node; + orte_proc_t *proc; + orte_std_cntr_t index; + + /* Since we are the HNP, then responsibility for + * defining the name falls to the PLM component for our + * respective environment - hence, we have to open the PLM + * first and select that component. Note that ONLY the + * HNP ever uses a PLM component anyway + */ + if (ORTE_SUCCESS != (ret = orte_plm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_select"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_set_hnp_name"; + goto error; + } + + /* Setup the communication infrastructure */ + /* + * Runtime Messaging Layer + */ + if (ORTE_SUCCESS != (ret = orte_rml_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_base_select"; + goto error; + } + /* + * Routed system + */ + if (ORTE_SUCCESS != (ret = orte_routed_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_routed_base_select"; + goto error; + } + /* + * Group communications + */ + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_grpcomm_base_select"; + goto error; + } + + /* Now provide a chance for the PLM + * to perform any module-specific init functions. This + * needs to occur AFTER the communications are setup + * as it may involve starting a non-blocking recv + */ + if (ORTE_SUCCESS != (ret = orte_plm.init())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_init"; + goto error; + } + + /* + * Setup the remaining resource + * management and errmgr frameworks - application procs + * and daemons do not open these frameworks as they only use + * the hnp proxy support in the PLM framework. + */ + if (ORTE_SUCCESS != (ret = orte_ras_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_ras_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_ras_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_ras_base_find_available"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_rmaps_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rmaps_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_rmaps_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rmaps_base_find_available"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_errmgr_base_open())) { + error = "orte_errmgr_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_errmgr_base_select"; + goto error; + } + + /* Open/select the odls */ + if (ORTE_SUCCESS != (ret = orte_odls_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_odls_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_select"; + goto error; + } + + /* enable communication with the rml */ + if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { + ORTE_ERROR_LOG(ret); + error = "orte_rml.enable_comm"; + goto error; + } + + /* setup my session directory */ + if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobid_str, ORTE_PROC_MY_NAME->jobid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_jobid_to_string"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_util_convert_vpid_to_string(&procid_str, ORTE_PROC_MY_NAME->vpid))) { + ORTE_ERROR_LOG(ret); + error = "orte_convert_vpid_to_string"; + goto error; + } + + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s setting up session dir with\n\ttmpdir: %s\n\tuser %s\n\thost %s\n\tjobid %s\n\tprocid %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, + orte_system_info.user, orte_system_info.nodename, jobid_str, procid_str)); + + if (ORTE_SUCCESS != (ret = orte_session_dir(true, + orte_process_info.tmpdir_base, + orte_system_info.user, + orte_system_info.nodename, NULL, + jobid_str, procid_str))) { + if (jobid_str != NULL) free(jobid_str); + if (procid_str != NULL) free(procid_str); + ORTE_ERROR_LOG(ret); + error = "orte_session_dir"; + goto error; + } + if (NULL != jobid_str) { + free(jobid_str); + } + if (NULL != procid_str) { + free(procid_str); + } + + /* Once the session directory location has been established, set + the opal_output hnp file location to be in the + proc-specific session directory. */ + opal_output_set_output_file_info(orte_process_info.proc_session_dir, + "output-", NULL, NULL); + + /* save my contact info in a file for others to find */ + contact_path = opal_os_path(false, orte_process_info.job_session_dir, + "contact.txt", NULL); + + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s writing contact file %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + contact_path)); + + if (ORTE_SUCCESS != (ret = orte_write_hnp_contact_file(contact_path))) { + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s writing contact file failed with error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_ERROR_NAME(ret))); + } else { + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s wrote contact file", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + } + free(contact_path); + + /* Setup the job data object for the daemons */ + /* create and store the job data object */ + jdata = OBJ_NEW(orte_job_t); + jdata->jobid = ORTE_PROC_MY_NAME->jobid; + orte_pointer_array_add(&index, orte_job_data, jdata); + + /* create and store a node object where we are */ + node = OBJ_NEW(orte_node_t); + node->name = strdup(orte_system_info.nodename); + orte_pointer_array_add(&node->index, orte_node_pool, node); + + /* create and store a proc object for us */ + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->name.vpid = ORTE_PROC_MY_NAME->vpid; + proc->pid = orte_process_info.pid; + proc->rml_uri = orte_rml.get_contact_info(); + proc->state = ORTE_PROC_STATE_RUNNING; + OBJ_RETAIN(node); /* keep accounting straight */ + proc->node = node; + orte_pointer_array_add(&index, jdata->procs, proc); + + /* record that the daemon (i.e., us) is on this node + * NOTE: we do not add the proc object to the node's + * proc array because we are not an application proc. + * Instead, we record it in the daemon field of the + * node object + */ + OBJ_RETAIN(proc); /* keep accounting straight */ + node->daemon = proc; + node->daemon_launched = true; + node->state = ORTE_NODE_STATE_UP; + + /* record that the daemon job is running */ + jdata->num_procs = 1; + jdata->state = ORTE_JOB_STATE_RUNNING; + + /* setup the routed info - the selected routed component + * will know what to do. + */ + if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_routed.init_routes"; + goto error; + } + + /* + * setup I/O forwarding system - must come after we init routes */ + if (ORTE_SUCCESS != (ret = orte_iof_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_iof_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_iof_base_select"; + goto error; + } + + /* setup the FileM */ + if (ORTE_SUCCESS != (ret = orte_filem_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_filem_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_filem_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_filem_base_select"; + goto error; + } + +#if OPAL_ENABLE_FT == 1 + /* + * Setup the SnapC + */ + if (ORTE_SUCCESS != (ret = orte_snapc_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + ORTE_ERROR_LOG(ret); + error = "orte_snapc_base_select"; + goto error; + } + + /* For HNP, ORTE doesn't need the OPAL CR stuff */ + opal_cr_set_enabled(false); +#else + opal_cr_set_enabled(false); +#endif + + /* + * Initalize the CR setup + * Note: Always do this, even in non-FT builds. + * If we don't some user level tools may hang. + */ + if (ORTE_SUCCESS != (ret = orte_cr_init())) { + ORTE_ERROR_LOG(ret); + error = "orte_cr_init"; + goto error; + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +static int rte_finalize(void) +{ + char *contact_path; + + /* remove my contact info file */ + contact_path = opal_os_path(false, orte_process_info.top_session_dir, + "contact.txt", NULL); + unlink(contact_path); + free(contact_path); + + orte_cr_finalize(); + +#if OPAL_ENABLE_FT == 1 + orte_snapc_base_close(); +#endif + orte_filem_base_close(); + + orte_odls_base_close(); + + orte_wait_finalize(); + orte_iof_base_close(); + + /* finalize selected modules so they can de-register + * any receives + */ + orte_ras_base_close(); + orte_rmaps_base_close(); + orte_plm_base_close(); + orte_errmgr_base_close(); + + /* now can close the rml and its friendly group comm */ + orte_grpcomm_base_close(); + orte_routed_base_close(); + orte_rml_base_close(); + + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} + +/* + * For application procs, we do NOT call the regular + * C-library "abort" function, even though that would have + * alerted us to the fact that this is an abnormal termination, + * because it would automatically cause a core file to be + * generated. On large systems, that can be overwhelming + * (imagine a few thousand Gbyte-sized files hitting + * a shared file system simultaneously...ouch!). + * + * However, the HNP is only ONE process, so we can do it + * here as the core file might prove useful. + */ +static void rte_abort(int status, bool report) +{ + /* do NOT do a normal finalize as this will very likely + * hang the process. We are aborting due to an abnormal condition + * that precludes normal cleanup + * + * We do need to do the following bits to make sure we leave a + * clean environment. Taken from orte_finalize(): + * - Assume errmgr cleans up child processes before we exit. + */ + + /* CRS cleanup since it may have a named pipe and thread active */ + orte_cr_finalize(); + + /* - Clean out the global structures + * (not really necessary, but good practice) + */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + /* Now abort */ + abort(); +} + diff --git a/orte/mca/sds/lsf/Makefile.am b/orte/mca/ess/lsf/Makefile.am similarity index 71% rename from orte/mca/sds/lsf/Makefile.am rename to orte/mca/ess/lsf/Makefile.am index faba825c09..289fedf4d3 100644 --- a/orte/mca/sds/lsf/Makefile.am +++ b/orte/mca/ess/lsf/Makefile.am @@ -20,29 +20,29 @@ AM_CPPFLAGS = $(pls_lsf_CPPFLAGS) sources = \ - sds_lsf.h \ - sds_lsf_component.c \ - sds_lsf_module.c + ess_lsf.h \ + ess_lsf_component.c \ + ess_lsf_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_sds_lsf_DSO +if OMPI_BUILD_ess_lsf_DSO component_noinst = -component_install = mca_sds_lsf.la +component_install = mca_ess_lsf.la else -component_noinst = libmca_sds_lsf.la +component_noinst = libmca_ess_lsf.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sds_lsf_la_SOURCES = $(sources) -mca_sds_lsf_la_LDFLAGS = -module -avoid-version $(sds_lsf_LDFLAGS) -mca_sds_lsf_la_LIBADD = $(sds_lsf_LIBS) +mca_ess_lsf_la_SOURCES = $(sources) +mca_ess_lsf_la_LDFLAGS = -module -avoid-version $(ess_lsf_LDFLAGS) +mca_ess_lsf_la_LIBADD = $(ess_lsf_LIBS) noinst_LTLIBRARIES = $(component_noinst) -libmca_sds_lsf_la_SOURCES =$(sources) -libmca_sds_lsf_la_LDFLAGS = -module -avoid-version $(sds_lsf_LDFLAGS) -libmca_sds_lsf_la_LIBADD = $(sds_lsf_LIBS) +libmca_ess_lsf_la_SOURCES =$(sources) +libmca_ess_lsf_la_LDFLAGS = -module -avoid-version $(ess_lsf_LDFLAGS) +libmca_ess_lsf_la_LIBADD = $(ess_lsf_LIBS) diff --git a/orte/mca/sds/lsf/configure.m4 b/orte/mca/ess/lsf/configure.m4 similarity index 71% rename from orte/mca/sds/lsf/configure.m4 rename to orte/mca/ess/lsf/configure.m4 index 9f432c93ab..48bcae15a0 100644 --- a/orte/mca/sds/lsf/configure.m4 +++ b/orte/mca/ess/lsf/configure.m4 @@ -18,21 +18,21 @@ # $HEADER$ # -# MCA_sds_lsf_CONFIG([action-if-found], [action-if-not-found]) +# MCA_ess_lsf_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_sds_lsf_CONFIG],[ - OMPI_CHECK_LSF([sds_lsf], [sds_lsf_good=1], [sds_lsf_good=0]) +AC_DEFUN([MCA_ess_lsf_CONFIG],[ + OMPI_CHECK_LSF([ess_lsf], [ess_lsf_good=1], [ess_lsf_good=0]) # if check worked, set wrapper flags if so. # Evaluate succeed / fail - AS_IF([test "$sds_lsf_good" = "1"], - [sds_lsf_WRAPPER_EXTRA_LDFLAGS="$sds_lsf_LDFLAGS" - sds_lsf_WRAPPER_EXTRA_LIBS="$sds_lsf_LIBS" + AS_IF([test "$ess_lsf_good" = "1"], + [ess_lsf_WRAPPER_EXTRA_LDFLAGS="$ess_lsf_LDFLAGS" + ess_lsf_WRAPPER_EXTRA_LIBS="$ess_lsf_LIBS" $1], [$2]) # set build flags to use in makefile - AC_SUBST([sds_lsf_CPPFLAGS]) - AC_SUBST([sds_lsf_LDFLAGS]) - AC_SUBST([sds_lsf_LIBS]) + AC_SUBST([ess_lsf_CPPFLAGS]) + AC_SUBST([ess_lsf_LDFLAGS]) + AC_SUBST([ess_lsf_LIBS]) ])dnl diff --git a/orte/mca/ess/lsf/configure.params b/orte/mca/ess/lsf/configure.params new file mode 100644 index 0000000000..0727eefc3f --- /dev/null +++ b/orte/mca/ess/lsf/configure.params @@ -0,0 +1,27 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that, if we can build, +# all the LSF and supporting components will build + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/sds/lsf/sds_lsf.h b/orte/mca/ess/lsf/ess_lsf.h similarity index 68% rename from orte/mca/sds/lsf/sds_lsf.h rename to orte/mca/ess/lsf/ess_lsf.h index b680f71580..e660adb0d5 100644 --- a/orte/mca/sds/lsf/sds_lsf.h +++ b/orte/mca/ess/lsf/ess_lsf.h @@ -17,30 +17,20 @@ * $HEADER$ */ -#ifndef ORTE_SDS_LSF_H -#define ORTE_SDS_LSF_H +#ifndef ORTE_ESS_LSF_H +#define ORTE_ESS_LSF_H BEGIN_C_DECLS -ORTE_MODULE_DECLSPEC extern orte_sds_base_component_t mca_sds_lsf_component; +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_lsf_component; /* * Module open / close */ -int orte_sds_lsf_component_open(void); -int orte_sds_lsf_component_close(void); -orte_sds_base_module_t* orte_sds_lsf_component_init(int *priority); - -/* - * Startup / Shutdown - */ -int orte_sds_lsf_finalize(void); - -/* - * Module functions - */ -int orte_sds_lsf_set_name(void); +int orte_ess_lsf_component_open(void); +int orte_ess_lsf_component_close(void); +orte_ess_base_module_t* orte_ess_lsf_component_init(int *priority); END_C_DECLS -#endif /* ORTE_SDS_LSF_H */ +#endif /* ORTE_ESS_LSF_H */ diff --git a/orte/mca/sds/lsf/sds_lsf_component.c b/orte/mca/ess/lsf/ess_lsf_component.c similarity index 61% rename from orte/mca/sds/lsf/sds_lsf_component.c rename to orte/mca/ess/lsf/ess_lsf_component.c index 3d3facd99f..d376da9271 100644 --- a/orte/mca/sds/lsf/sds_lsf_component.c +++ b/orte/mca/ess/lsf/ess_lsf_component.c @@ -18,25 +18,28 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/lsf/sds_lsf.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_lsf_module; +#include "orte/util/proc_info.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/lsf/ess_lsf.h" + +extern orte_ess_base_module_t orte_ess_lsf_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_lsf_component = { +orte_ess_base_component_t mca_ess_lsf_component = { { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ "lsf", @@ -45,8 +48,8 @@ orte_sds_base_component_t mca_sds_lsf_component = { ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_lsf_component_open, - orte_sds_lsf_component_close + orte_ess_lsf_component_open, + orte_ess_lsf_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -56,40 +59,36 @@ orte_sds_base_component_t mca_sds_lsf_component = { }, /* Initialization / querying functions */ - orte_sds_lsf_component_init + orte_ess_lsf_component_init }; -int orte_sds_lsf_component_open(void) +int orte_ess_lsf_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t *orte_sds_lsf_component_init(int *priority) +orte_ess_base_module_t *orte_ess_lsf_component_init(int *priority) { - int id; - char *mode; - - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - if (NULL == mode || 0 != strcmp("lsf", mode)) { - if (NULL != mode) { - free(mode); - } - return NULL; + /* Are we running under an LSF job? Were + * we given a path back to the HNP? If the + * answer to both is "yes", then we were launched + * by mpirun in an LSF world + */ + + if (NULL != getenv("LSB_JOBID") && + NULL != orte_process_info.my_hnp_uri) { + *priority = 40; + return &orte_ess_lsf_module; } - - if (NULL != mode) { - free(mode); - } - *priority = 20; - return &orte_sds_lsf_module; + + /* nope, not here */ + return NULL; } -int orte_sds_lsf_component_close(void) +int orte_ess_lsf_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c new file mode 100644 index 0000000000..051a7a9cbc --- /dev/null +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ +#include + +#include + +#include "opal/util/argv.h" +#include "opal/util/opal_environ.h" + +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" +#include "opal/mca/base/mca_base_param.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/sys_info.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/lsf/ess_lsf.h" + +static int lsf_set_name(void); + +static int rte_init(char flags); +static int rte_finalize(void); + +orte_ess_base_module_t orte_ess_lsf_module = { + rte_init, + rte_finalize, + orte_ess_base_app_abort +}; + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + + /* Start by getting a unique name */ + lsf_set_name(); + + /* if I am a daemon, complete my setup using the + * default procedure + */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_orted_setup"; + goto error; + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto error; + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_app_setup"; + goto error; + } + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +static int rte_finalize(void) +{ + int ret; + + /* if I am a daemon, finalize using the default procedure */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { + ORTE_ERROR_LOG(ret); + } + } + + return ret; +} + +static int lsf_set_name(void) +{ + int rc; + int id; + char* name_string = NULL; + int lsf_nodeid; + + /* start by getting our jobid, and vpid (which is the + starting vpid for the list of daemons) */ + id = mca_base_param_register_string("orte", "ess", "name", NULL, NULL); + mca_base_param_lookup_string(id, &name_string); + + if (name_string != NULL) { + if (ORTE_SUCCESS != + (rc = orte_util_convert_string_to_process_name(&ORTE_PROC_MY_NAME, name_string))) { + ORTE_ERROR_LOG(rc); + free(name_string); + return rc; + } + free(name_string); + } else { + orte_jobid_t jobid; + orte_vpid_t vpid; + char* jobid_string; + char* vpid_string; + + id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); + mca_base_param_lookup_string(id, &jobid_string); + if (NULL == jobid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != + (rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); + mca_base_param_lookup_string(id, &vpid_string); + if (NULL == vpid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != + (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + ORTE_PROC_MY_NAME->jobid; + ORTE_PROC_MY_NAME->vpid = vpid; + } + + /* fix up the base name and make it the "real" name */ + lsf_nodeid = atoi(getenv("LSF_PM_TASKID")); + ORTE_PROC_MY_NAME->vpid = lsf_nodeid; + + /* get the non-name common environmental variables */ + if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} diff --git a/orte/mca/sds/portals_utcp/Makefile.am b/orte/mca/ess/portals_utcp/Makefile.am similarity index 64% rename from orte/mca/sds/portals_utcp/Makefile.am rename to orte/mca/ess/portals_utcp/Makefile.am index 39a47cb167..515aa3dc39 100644 --- a/orte/mca/sds/portals_utcp/Makefile.am +++ b/orte/mca/ess/portals_utcp/Makefile.am @@ -16,32 +16,32 @@ # $HEADER$ # -AM_CPPFLAGS = $(sds_portals_utcp_CPPFLAGS) +AM_CPPFLAGS = $(ess_portals_utcp_CPPFLAGS) sources = \ - sds_portals_utcp.h \ - sds_portals_utcp_component.c \ - sds_portals_utcp_module.c + ess_portals_utcp.h \ + ess_portals_utcp_component.c \ + ess_portals_utcp_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_sds_portals_utcp_DSO +if OMPI_BUILD_ess_portals_utcp_DSO component_noinst = -component_install = mca_sds_portals_utcp.la +component_install = mca_ess_portals_utcp.la else -component_noinst = libmca_sds_portals_utcp.la +component_noinst = libmca_ess_portals_utcp.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sds_portals_utcp_la_SOURCES = $(sources) -mca_sds_portals_utcp_la_LDFLAGS = -module -avoid-version $(sds_portals_utcp_LDFLAGS) -mca_sds_portals_utcp_la_LIBADD = $(sds_portals_utcp_LIBS) +mca_ess_portals_utcp_la_SOURCES = $(sources) +mca_ess_portals_utcp_la_LDFLAGS = -module -avoid-version $(ess_portals_utcp_LDFLAGS) +mca_ess_portals_utcp_la_LIBADD = $(ess_portals_utcp_LIBS) noinst_LTLIBRARIES = $(component_noinst) -libmca_sds_portals_utcp_la_SOURCES =$(sources) -libmca_sds_portals_utcp_la_LDFLAGS = -module -avoid-version $(sds_portals_utcp_LDFLAGS) -libmca_sds_portals_utcp_la_LIBADD = $(sds_portals_utcp_LIBS) +libmca_ess_portals_utcp_la_SOURCES =$(sources) +libmca_ess_portals_utcp_la_LDFLAGS = -module -avoid-version $(ess_portals_utcp_LDFLAGS) +libmca_ess_portals_utcp_la_LIBADD = $(ess_portals_utcp_LIBS) diff --git a/orte/mca/sds/portals_utcp/configure.m4 b/orte/mca/ess/portals_utcp/configure.m4 similarity index 70% rename from orte/mca/sds/portals_utcp/configure.m4 rename to orte/mca/ess/portals_utcp/configure.m4 index f1bfa1f8f0..585dcc9053 100644 --- a/orte/mca/sds/portals_utcp/configure.m4 +++ b/orte/mca/ess/portals_utcp/configure.m4 @@ -18,15 +18,15 @@ # -# MCA_sds_portals_utcp_CONFIG(action-if-can-compile, +# MCA_ess_portals_utcp_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ -AC_DEFUN([MCA_sds_portals_utcp_CONFIG],[ +AC_DEFUN([MCA_ess_portals_utcp_CONFIG],[ # save compiler flags so that we don't alter them for later # components. - sds_portals_utcp_save_CPPFLAGS="$CPPFLAGS" - sds_portals_utcp_save_LDFLAGS="$LDFLAGS" - sds_portals_utcp_save_LIBS="$LIBS" + ess_portals_utcp_save_CPPFLAGS="$CPPFLAGS" + ess_portals_utcp_save_LDFLAGS="$LDFLAGS" + ess_portals_utcp_save_LIBS="$LIBS" # allow user a way to say where the Portals installation is AC_ARG_WITH(portals, @@ -35,27 +35,27 @@ AC_DEFUN([MCA_sds_portals_utcp_CONFIG],[ AS_IF([test -n "$with_portals"], [AS_IF([test -d "$with_portals/include"], - [sds_portals_utcp_CPPFLAGS="-I$with_portals/include" - CPPFLAGS="$CPPFLAGS $sds_portals_utcp_CPPFLAGS"], []) + [ess_portals_utcp_CPPFLAGS="-I$with_portals/include" + CPPFLAGS="$CPPFLAGS $ess_portals_utcp_CPPFLAGS"], []) AS_IF([test -d "$with_portals/lib"], - [sds_portals_utcp_LDFLAGS="-L$with_portals/lib" - LDFLAGS="$LDFLAGS $sds_portals_utcp_LDFLAGS"], [])]) + [ess_portals_utcp_LDFLAGS="-L$with_portals/lib" + LDFLAGS="$LDFLAGS $ess_portals_utcp_LDFLAGS"], [])]) # Try to find all the portals libraries (this is not fun!) AC_ARG_WITH(portals-libs, AC_HELP_STRING([--with-portals-libs=LIBS], [Libraries to link with for portals])) if test -n "$with_portals_libs" ; then - sds_portals_utcp_LIBS="" + ess_portals_utcp_LIBS="" for lib in $with_portals_libs ; do - sds_portals_utcp_LIBS="$sds_portals_utcp_LIBS -l$lib" + ess_portals_utcp_LIBS="$ess_portals_utcp_LIBS -l$lib" done fi - sds_portals_utcp_LIBS="-lp3utcp -lp3api -lp3lib -lp3rt -lp3utcp" + ess_portals_utcp_LIBS="-lp3utcp -lp3api -lp3lib -lp3rt -lp3utcp" # check for portals - LIBS="$LIBS $sds_portals_utcp_LIBS" + LIBS="$LIBS $ess_portals_utcp_LIBS" AC_MSG_CHECKING([for PtlGetRank]) AC_LINK_IFELSE([AC_LANG_PROGRAM([#include #include @@ -76,17 +76,17 @@ PtlGetRank(PTL_INVALID_HANDLE, &rank, &nprocs);])], # we don't actually need the Portals code for this component, # so don't link against them... - sds_portals_utcp_CPPFLAGS= - sds_portals_utcp_LDFLAGS= - sds_portals_utcp_LIBS= + ess_portals_utcp_CPPFLAGS= + ess_portals_utcp_LDFLAGS= + ess_portals_utcp_LIBS= # substitute in the things needed to build Portals - AC_SUBST([sds_portals_utcp_CPPFLAGS]) - AC_SUBST([sds_portals_utcp_LDFLAGS]) - AC_SUBST([sds_portals_utcp_LIBS]) + AC_SUBST([ess_portals_utcp_CPPFLAGS]) + AC_SUBST([ess_portals_utcp_LDFLAGS]) + AC_SUBST([ess_portals_utcp_LIBS]) # reset the flags for the next test - CPPFLAGS="$sds_portals_utcp_save_CPPFLAGS" - LDFLAGS="$sds_portals_utcp_save_LDFLAGS" - LIBS="$sds_portals_utcp_save_LIBS" + CPPFLAGS="$ess_portals_utcp_save_CPPFLAGS" + LDFLAGS="$ess_portals_utcp_save_LDFLAGS" + LIBS="$ess_portals_utcp_save_LIBS" ])dnl diff --git a/orte/mca/gpr/null/configure.params b/orte/mca/ess/portals_utcp/configure.params similarity index 88% rename from orte/mca/gpr/null/configure.params rename to orte/mca/ess/portals_utcp/configure.params index feb84f3613..67e1119704 100644 --- a/orte/mca/gpr/null/configure.params +++ b/orte/mca/ess/portals_utcp/configure.params @@ -19,7 +19,9 @@ # $HEADER$ # -# Specific to this module - PARAM_CONFIG_FILES="Makefile" -PARAM_CONFIG_PRIORITY=0 +# +# Set the config priority so that, if we can build, +# only the portals component will build + +PARAM_CONFIG_PRIORITY=50 diff --git a/orte/mca/rds/base/rds_base_no_ops.c b/orte/mca/ess/portals_utcp/ess_portals_utcp.h similarity index 65% rename from orte/mca/rds/base/rds_base_no_ops.c rename to orte/mca/ess/portals_utcp/ess_portals_utcp.h index 6982dc5793..d2335ec74b 100644 --- a/orte/mca/rds/base/rds_base_no_ops.c +++ b/orte/mca/ess/portals_utcp/ess_portals_utcp.h @@ -5,35 +5,26 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** @file: - */ -/* - * includes - */ -#include "orte_config.h" -#include "orte/orte_constants.h" +#ifndef ORTE_ESS_PORTALS_UTCP_H +#define ORTE_ESS_PORTALS_UTCP_H -#include "opal/class/opal_list.h" +BEGIN_C_DECLS -#include "orte/mca/rds/base/rds_private.h" +int orte_ess_portals_utcp_component_open(void); +int orte_ess_portals_utcp_component_close(void); +orte_ess_base_module_t* orte_ess_portals_utcp_component_init(int *priority); -int orte_rds_base_no_op_query(orte_jobid_t job) -{ - return ORTE_ERR_NOT_SUPPORTED; -} +END_C_DECLS -int orte_rds_base_no_op_store_resource(opal_list_t *resources) -{ - return ORTE_ERR_NOT_SUPPORTED; -} +#endif /* ORTE_ESS_PORTALS_UTCP_H */ diff --git a/orte/mca/sds/portals_utcp/sds_portals_utcp_component.c b/orte/mca/ess/portals_utcp/ess_portals_utcp_component.c similarity index 66% rename from orte/mca/sds/portals_utcp/sds_portals_utcp_component.c rename to orte/mca/ess/portals_utcp/ess_portals_utcp_component.c index e4c7c8f85a..1a369ade6e 100644 --- a/orte/mca/sds/portals_utcp/sds_portals_utcp_component.c +++ b/orte/mca/ess/portals_utcp/ess_portals_utcp_component.c @@ -23,25 +23,26 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/portals_utcp/sds_portals_utcp.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_portals_utcp_module; +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/portals_utcp/ess_portals_utcp.h" + +extern orte_ess_base_module_t orte_ess_portals_utcp_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_portals_utcp_component = { +orte_ess_base_component_t mca_ess_portals_utcp_component = { /* First, the mca_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ "portals_utcp", @@ -50,8 +51,8 @@ orte_sds_base_component_t mca_sds_portals_utcp_component = { ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_portals_utcp_component_open, - orte_sds_portals_utcp_component_close + orte_ess_portals_utcp_component_open, + orte_ess_portals_utcp_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -61,41 +62,35 @@ orte_sds_base_component_t mca_sds_portals_utcp_component = { }, /* Initialization / querying functions */ - orte_sds_portals_utcp_component_init + orte_ess_portals_utcp_component_init }; int -orte_sds_portals_utcp_component_open(void) +orte_ess_portals_utcp_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_portals_utcp_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_portals_utcp_component_init(int *priority) { - int id; - char *mode; - - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - /* if mode isn't NULL, then we have an ORTE starter. Don't use - this component */ - if (NULL != mode) { - free(mode); - return NULL; - } + /* since we are not launched by an ORTE launcher, + * we want to be selected ahead of the singleton + * component if we detect our supported environment. + * So ensure that our priority is higher than + * the singleton's + */ if (NULL == getenv("PTL_MY_RID")) return NULL; *priority = 60; - return &orte_sds_portals_utcp_module; + return &orte_ess_portals_utcp_module; } int -orte_sds_portals_utcp_component_close(void) +orte_ess_portals_utcp_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c b/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c new file mode 100644 index 0000000000..a08b7ae833 --- /dev/null +++ b/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/util/output.h" + +#include "orte/mca/errmgr/base/base.h" +#include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/portals_utcp/ess_portals_utcp.h" + +static int rte_init(char flags); +static int rte_finalize(void); +static void rte_abort(int status, bool report) __opal_attribute_noreturn__; + +orte_ess_base_module_t orte_ess_portals_utcp_module = { + rte_init, + rte_finalize, + rte_abort +}; + +static int rte_init(char flags) +{ + int rc, i, len, num_procs; + orte_vpid_t vpid; + char *vpid_string, *jobid_str; + char *nidmap_string; + + vpid_string = getenv("PTL_MY_RID"); + nidmap_string = getenv("PTL_NIDMAP"); + if (NULL == vpid_string || NULL == nidmap_string || + NULL == getenv("PTL_PIDMAP") || NULL == getenv("PTL_IFACE")) { + return ORTE_ERR_NOT_FOUND; + } + + /* Get our process information */ + + /* Procs in this environment are directly launched. Hence, there + * was no mpirun to create a jobid for us, and each app proc is + * going to have to fend for itself. For now, we assume that the + * jobid is some arbitrary number (say, 1). + */ + ORTE_PROC_MY_NAME->jobid = 1; /* not 0, since it has special meaning */ + + /* find our vpid assuming range starts at 0 */ + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + ORTE_PROC_MY_NAME->vpid = vpid; + + /* + * Get the number of procs in the job. We assume vpids start at 0. We + * assume that there are procs, since the nidmap is a + * : seperated list of nids, and the utcp reference implementation + * assumes all will be present + */ + len = strlen(nidmap_string); + num_procs = 1; + for (i = 0 ; i < len ; ++i) { + if (nidmap_string[i] == ':') num_procs++; + } + + orte_process_info.num_procs = (orte_std_cntr_t) num_procs; + + /* MPI_Init needs the grpcomm framework, so we have to init it */ + if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_grpcomm_base_select())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* we also want our session directory for shared memory support */ + if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&jobid_str, ORTE_PROC_MY_NAME->jobid))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid_string, ORTE_PROC_MY_NAME->vpid))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, + "%s setting up session dir with\n\ttmpdir: %s\n\tuser %s\n\thost %s\n\tjobid %s\n\tprocid %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, + orte_system_info.user, orte_system_info.nodename, jobid_str, vpid_string)); + + if (ORTE_SUCCESS != (rc = orte_session_dir(true, + orte_process_info.tmpdir_base, + orte_system_info.user, + orte_system_info.nodename, NULL, + jobid_str, vpid_string))) { + if (jobid_str != NULL) free(jobid_str); + if (vpid_string != NULL) free(vpid_string); + ORTE_ERROR_LOG(rc); + return rc; + } + if (NULL != jobid_str) { + free(jobid_str); + } + if (NULL != vpid_string) { + free(vpid_string); + } + + /* Once the session directory location has been established, set + the opal_output env file location to be in the + proc-specific session directory. */ + opal_output_set_output_file_info(orte_process_info.proc_session_dir, + "output-", NULL, NULL); + + /* that's all we need here */ + return ORTE_SUCCESS; +} + + +static int rte_finalize(void) +{ + /* just cleanup the things we used */ + orte_grpcomm_base_close(); + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* clean out the global structures */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + return ORTE_SUCCESS; +} + +static void rte_abort(int status, bool report) +{ + exit(status); +} diff --git a/orte/mca/ess/singleton/Makefile.am b/orte/mca/ess/singleton/Makefile.am new file mode 100644 index 0000000000..c0c1f65ceb --- /dev/null +++ b/orte/mca/ess/singleton/Makefile.am @@ -0,0 +1,43 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + ess_singleton.h \ + ess_singleton_component.c \ + ess_singleton_module.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_ess_singleton_DSO +component_noinst = +component_install = mca_ess_singleton.la +else +component_noinst = libmca_ess_singleton.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_ess_singleton_la_SOURCES = $(sources) +mca_ess_singleton_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_ess_singleton_la_SOURCES =$(sources) +libmca_ess_singleton_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/ess/singleton/configure.m4 b/orte/mca/ess/singleton/configure.m4 new file mode 100644 index 0000000000..84babc1d02 --- /dev/null +++ b/orte/mca/ess/singleton/configure.m4 @@ -0,0 +1,13 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Sandia National Laboratories. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ess_singleton_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_ess_singleton_CONFIG], [$1]) diff --git a/orte/mca/ess/singleton/configure.params b/orte/mca/ess/singleton/configure.params new file mode 100644 index 0000000000..2cfa5b69ee --- /dev/null +++ b/orte/mca/ess/singleton/configure.params @@ -0,0 +1,28 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that the singleton +# component will build for all environs -except- +# those special ones that do not support it + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/ess/singleton/ess_singleton.h b/orte/mca/ess/singleton/ess_singleton.h new file mode 100644 index 0000000000..1fd601afd1 --- /dev/null +++ b/orte/mca/ess/singleton/ess_singleton.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_ESS_SINGLETON_H +#define ORTE_ESS_SINGLETON_H + +BEGIN_C_DECLS + +/* + * Module open / close + */ +int orte_ess_singleton_component_open(void); +int orte_ess_singleton_component_close(void); +orte_ess_base_module_t* orte_ess_singleton_component_init(int *priority); + +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_singleton_component; + +END_C_DECLS + +#endif /* ORTE_ESS_SINGLETON_H */ diff --git a/orte/mca/sds/bproc/sds_bproc_component.c b/orte/mca/ess/singleton/ess_singleton_component.c similarity index 55% rename from orte/mca/sds/bproc/sds_bproc_component.c rename to orte/mca/ess/singleton/ess_singleton_component.c index 1cd21489aa..0ae435fcf7 100644 --- a/orte/mca/sds/bproc/sds_bproc_component.c +++ b/orte/mca/ess/singleton/ess_singleton_component.c @@ -23,35 +23,38 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/bproc/sds_bproc.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_bproc_module; +#include "orte/util/proc_info.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/singleton/ess_singleton.h" + +extern orte_ess_base_module_t orte_ess_singleton_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_bproc_component = { +orte_ess_base_component_t mca_ess_singleton_component = { /* First, the mca_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ - "bproc", + "singleton", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_bproc_component_open, - orte_sds_bproc_component_close + orte_ess_singleton_component_open, + orte_ess_singleton_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -61,44 +64,52 @@ orte_sds_base_component_t mca_sds_bproc_component = { }, /* Initialization / querying functions */ - orte_sds_bproc_component_init + orte_ess_singleton_component_init }; int -orte_sds_bproc_component_open(void) +orte_ess_singleton_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_bproc_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_singleton_component_init(int *priority) { - int id; - char *mode; - - /* okay, not seed/singleton attempt another approach */ - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - if (NULL == mode || 0 != strcmp("bproc", mode)) { - if(NULL != mode) { - free(mode); - } - return NULL; + /* if we are an HNP, daemon, or tool, then we + * are definitely not a singleton! + */ + if (orte_process_info.hnp || + orte_process_info.daemon || + orte_process_info.tool) { + return NULL; } - - if(NULL != mode) { - free(mode); + + /* okay, we still could be a singleton or + * an application process. If we have been + * given an HNP URI, then we are definitely + * not a singleton + */ + if (NULL != orte_process_info.my_hnp_uri) { + return NULL; } - *priority = 20; - return &orte_sds_bproc_module; + + /* okay, we could still be an application process, + * but launched in "standalone" mode - i.e., directly + * launched by an environment instead of via mpirun. + * We need to set our priority low so that any enviro + * component will override us. If they don't, then we + * want to be selected as we must be a singleton + */ + *priority = 25; + return &orte_ess_singleton_module; } int -orte_sds_bproc_component_close(void) +orte_ess_singleton_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c new file mode 100644 index 0000000000..ad308c4377 --- /dev/null +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#include "opal/util/argv.h" +#include "opal/util/output.h" +#include "opal/util/path.h" +#include "opal/util/show_help.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/installdirs/installdirs.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/iof/iof.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/routed.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/singleton/ess_singleton.h" + +static int fork_hnp(void); + +static void set_handler_default(int sig) +{ +#if !defined(__WINDOWS__) + struct sigaction act; + + act.sa_handler = SIG_DFL; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + sigaction(sig, &act, (struct sigaction *)0); +#endif /* !defined(__WINDOWS__) */ +} + +static int rte_init(char flags); + +orte_ess_base_module_t orte_ess_singleton_module = { + rte_init, + orte_ess_base_app_finalize, + orte_ess_base_app_abort +}; + +static int rte_init(char flags) +{ + int rc; + + /* + * If we are the selected module, then we must be a singleton + * as it means that no other method for discovering a name + * could be found. In this case, we need to start a daemon that + * can support our operation. We must do this for two reasons: + * + * (1) if we try to play the role of the HNP, then any child processes + * we might start via comm_spawn will rely on us for all ORTE-level + * support. However, we can only progress those requests when the + * the application calls into the OMPI/ORTE library! Thus, if this + * singleton just does computation, the other processes will "hang" + * in any calls into the ORTE layer that communicate with the HNP - + * and most calls on application procs *do*. + * + * (2) daemons are used to communicate messages for administrative + * purposes in a broadcast-like manner. Thus, daemons are expected + * to be able to interpret specific commands. Our application process + * doesn't have any idea how to handle those commands, thus causing + * the entire ORTE administrative system to break down. + * + * For those reasons, we choose to fork/exec a daemon at this time + * and then reconnect ourselves to it. We could just "fork" and declare + * the child to be a daemon, but that would require we place *all* of the + * daemon command processing code in the ORTE library, do some strange + * mojo in a few places, etc. This doesn't seem worth it, so we'll just + * do the old fork/exec here + * + * Note that Windows-based systems have to do their own special trick as + * they don't support fork/exec. So we have to use a giant "if" here to + * protect the Windows world. To make the results more readable, we put + * the whole mess in a separate function below + */ + if (ORTE_SUCCESS != (rc= fork_hnp())) { + /* if this didn't work, then we cannot support operation any further. + * Abort the system and tell orte_init to exit + */ + ORTE_ERROR_LOG(rc); + return rc; + } + + orte_process_info.num_procs = 1; + /* since we are a singleton, then we must have a local_rank of 0 + * and only 1 local process + */ + orte_process_info.local_rank = 0; + orte_process_info.num_local_procs = 1; + + /* use the std app init to complete the procedure */ + if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* wireup our io */ + if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(ORTE_PROC_MY_NAME, ORTE_NS_CMP_JOBID, ORTE_IOF_STDOUT, 1))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(ORTE_PROC_MY_NAME, ORTE_NS_CMP_JOBID, ORTE_IOF_STDERR, 2))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_iof.iof_push(ORTE_PROC_MY_NAME, ORTE_NS_CMP_JOBID, ORTE_IOF_STDIN, 0))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} + + +#define ORTE_URI_MSG_LGTH 256 + +static int fork_hnp(void) +{ +#if !defined(__WINDOWS__) + int p[2], death_pipe[2]; + char *cmd; + char **argv = NULL; + int argc; + char *param; + sigset_t sigs; + int buffer_length, num_chars_read, chunk; + char *orted_uri; + int rc; + + /* A pipe is used to communicate between the parent and child to + indicate whether the exec ultiimately succeeded or failed. The + child sets the pipe to be close-on-exec; the child only ever + writes anything to the pipe if there is an error (e.g., + executable not found, exec() fails, etc.). The parent does a + blocking read on the pipe; if the pipe closed with no data, + then the exec() succeeded. If the parent reads something from + the pipe, then the child was letting us know that it failed. + */ + if (pipe(p) < 0) { + ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES); + return ORTE_ERR_SYS_LIMITS_PIPES; + } + + /* we also have to give the HNP a pipe it can watch to know when + * we terminated. Since the HNP is going to be a child of us, it + * can't just use waitpid to see when we leave - so it will watch + * the pipe instead + */ + if (pipe(death_pipe) < 0) { + ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES); + return ORTE_ERR_SYS_LIMITS_PIPES; + } + + /* find the orted binary using the install_dirs support - this also + * checks to ensure that we can see this executable and it *is* executable by us + */ + cmd = opal_path_access("orted", opal_install_dirs.bindir, X_OK); + if (NULL == cmd) { + /* guess we couldn't do it - best to abort */ + ORTE_ERROR_LOG(ORTE_ERR_FILE_NOT_EXECUTABLE); + close(p[0]); + close(p[1]); + return ORTE_ERR_FILE_NOT_EXECUTABLE; + } + + /* okay, setup an appropriate argv */ + opal_argv_append(&argc, &argv, "orted"); + + /* tell the daemon it is to be the HNP */ + opal_argv_append(&argc, &argv, "--hnp"); + + /* tell the daemon to get out of our process group */ + opal_argv_append(&argc, &argv, "--set-sid"); + + /* tell the daemon to report back its uri so we can connect to it */ + opal_argv_append(&argc, &argv, "--report-uri"); + asprintf(¶m, "%d", p[1]); + opal_argv_append(&argc, &argv, param); + free(param); + + /* give the daemon a pipe it can watch to tell when we have died */ + opal_argv_append(&argc, &argv, "--singleton-died-pipe"); + asprintf(¶m, "%d", death_pipe[0]); + opal_argv_append(&argc, &argv, param); + free(param); + + /* add any debug flags */ + if (orte_debug_flag) { + opal_argv_append(&argc, &argv, "--debug"); + } + + if (orte_debug_daemons_flag) { + opal_argv_append(&argc, &argv, "--debug-daemons"); + } + + if (orte_debug_daemons_file_flag) { + if (!orte_debug_daemons_flag) { + opal_argv_append(&argc, &argv, "--debug-daemons"); + } + opal_argv_append(&argc, &argv, "--debug-daemons-file"); + } + + /* Fork off the child */ + orte_process_info.hnp_pid = fork(); + if(orte_process_info.hnp_pid < 0) { + ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); + close(p[0]); + close(p[1]); + close(death_pipe[0]); + close(death_pipe[1]); + free(cmd); + return ORTE_ERR_SYS_LIMITS_CHILDREN; + } + + if (orte_process_info.hnp_pid == 0) { + close(p[0]); + close(death_pipe[1]); + /* I am the child - exec me */ + + /* Set signal handlers back to the default. Do this close + to the execve() because the event library may (and likely + will) reset them. If we don't do this, the event + library may have left some set that, at least on some + OS's, don't get reset via fork() or exec(). Hence, the + orted could be unkillable (for example). */ + set_handler_default(SIGTERM); + set_handler_default(SIGINT); + set_handler_default(SIGHUP); + set_handler_default(SIGPIPE); + set_handler_default(SIGCHLD); + + /* Unblock all signals, for many of the same reasons that + we set the default handlers, above. This is noticable + on Linux where the event library blocks SIGTERM, but we + don't want that blocked by the orted (or, more + specifically, we don't want it to be blocked by the + orted and then inherited by the ORTE processes that it + forks, making them unkillable by SIGTERM). */ + sigprocmask(0, 0, &sigs); + sigprocmask(SIG_UNBLOCK, &sigs, 0); + + execv(cmd, argv); + + /* if I get here, the execv failed! */ + opal_show_help("help-ess-base.txt", "ess-base:execv-error", + true, cmd, strerror(errno)); + exit(1); + + } else { + /* I am the parent - wait to hear something back and + * report results + */ + close(p[1]); /* parent closes the write - orted will write its contact info to it*/ + close(death_pipe[0]); /* parent closes the death_pipe's read */ + + /* setup the buffer to read the name + uri */ + buffer_length = ORTE_URI_MSG_LGTH; + chunk = ORTE_URI_MSG_LGTH-1; + num_chars_read = 0; + orted_uri = (char*)malloc(buffer_length); + + while (chunk == (rc = read(p[0], &orted_uri[num_chars_read], chunk))) { + /* we read an entire buffer - better get more */ + num_chars_read += chunk; + buffer_length += ORTE_URI_MSG_LGTH; + orted_uri = realloc((void*)orted_uri, buffer_length); + } + num_chars_read += rc; + + if (num_chars_read <= 0) { + /* we didn't get anything back - this is bad */ + ORTE_ERROR_LOG(ORTE_ERR_HNP_COULD_NOT_START); + free(orted_uri); + return ORTE_ERR_HNP_COULD_NOT_START; + } + + /* parse the name from the returned info */ + if (']' != orted_uri[strlen(orted_uri)-1]) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + free(orted_uri); + return ORTE_ERR_COMM_FAILURE; + } + orted_uri[strlen(orted_uri)-1] = '\0'; + if (NULL == (param = strrchr(orted_uri, '['))) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + free(orted_uri); + return ORTE_ERR_COMM_FAILURE; + } + *param = '\0'; /* terminate the string */ + param++; + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_process_name(ORTE_PROC_MY_NAME, param))) { + ORTE_ERROR_LOG(rc); + free(orted_uri); + return rc; + } + /* save the daemon uri - we will process it later */ + orte_process_info.my_daemon_uri = strdup(orted_uri); + + /* likewise, since this is also the HNP, set that uri too */ + orte_process_info.my_hnp_uri = strdup(orted_uri); + + /* indicate we are a singleton so orte_init knows what to do */ + orte_process_info.singleton = true; + /* all done - report success */ + free(orted_uri); + return ORTE_SUCCESS; + } +#else + /* someone will have to devise a Windows equivalent */ +#endif + + return ORTE_SUCCESS; +} diff --git a/orte/mca/rml/cnos/Makefile.am b/orte/mca/ess/slurm/Makefile.am similarity index 74% rename from orte/mca/rml/cnos/Makefile.am rename to orte/mca/ess/slurm/Makefile.am index 3787a003c6..71f8d25a8b 100644 --- a/orte/mca/rml/cnos/Makefile.am +++ b/orte/mca/ess/slurm/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - rml_cnos.h \ - rml_cnos.c + ess_slurm.h \ + ess_slurm_component.c \ + ess_slurm_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_rml_cnos_DSO +if OMPI_BUILD_ess_slurm_DSO component_noinst = -component_install = mca_rml_cnos.la +component_install = mca_ess_slurm.la else -component_noinst = libmca_rml_cnos.la +component_noinst = libmca_ess_slurm.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_rml_cnos_la_SOURCES = $(sources) -mca_rml_cnos_la_LDFLAGS = -module -avoid-version +mca_ess_slurm_la_SOURCES = $(sources) +mca_ess_slurm_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_rml_cnos_la_SOURCES = $(sources) -libmca_rml_cnos_la_LDFLAGS = -module -avoid-version - +libmca_ess_slurm_la_SOURCES =$(sources) +libmca_ess_slurm_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sds/slurm/configure.m4 b/orte/mca/ess/slurm/configure.m4 similarity index 69% rename from orte/mca/sds/slurm/configure.m4 rename to orte/mca/ess/slurm/configure.m4 index 6a14889ddf..8824d11885 100644 --- a/orte/mca/sds/slurm/configure.m4 +++ b/orte/mca/ess/slurm/configure.m4 @@ -17,21 +17,21 @@ # $HEADER$ # -# MCA_sds_slurm_CONFIG([action-if-found], [action-if-not-found]) +# MCA_ess_slurm_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_sds_slurm_CONFIG],[ - OMPI_CHECK_SLURM([sds_slurm], [sds_slurm_good=1], [sds_slurm_good=0]) +AC_DEFUN([MCA_ess_slurm_CONFIG],[ + OMPI_CHECK_SLURM([ess_slurm], [ess_slurm_good=1], [ess_slurm_good=0]) # if check worked, set wrapper flags if so. # Evaluate succeed / fail - AS_IF([test "$sds_slurm_good" = "1"], - [sds_slurm_WRAPPER_EXTRA_LDFLAGS="$sds_slurm_LDFLAGS" - sds_slurm_WRAPPER_EXTRA_LIBS="$sds_slurm_LIBS" + AS_IF([test "$ess_slurm_good" = "1"], + [ess_slurm_WRAPPER_EXTRA_LDFLAGS="$ess_slurm_LDFLAGS" + ess_slurm_WRAPPER_EXTRA_LIBS="$ess_slurm_LIBS" $1], [$2]) # set build flags to use in makefile - AC_SUBST([sds_slurm_CPPFLAGS]) - AC_SUBST([sds_slurm_LDFLAGS]) - AC_SUBST([sds_slurm_LIBS]) + AC_SUBST([ess_slurm_CPPFLAGS]) + AC_SUBST([ess_slurm_LDFLAGS]) + AC_SUBST([ess_slurm_LIBS]) ])dnl diff --git a/orte/mca/ess/slurm/configure.params b/orte/mca/ess/slurm/configure.params new file mode 100644 index 0000000000..08f3f59a30 --- /dev/null +++ b/orte/mca/ess/slurm/configure.params @@ -0,0 +1,27 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that, if we can build, +# all the SLURM and supporting components will build + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/ras/xgrid/src/ras_xgrid.h b/orte/mca/ess/slurm/ess_slurm.h similarity index 59% rename from orte/mca/ras/xgrid/src/ras_xgrid.h rename to orte/mca/ess/slurm/ess_slurm.h index 628efdfb70..7d9ca19fb6 100644 --- a/orte/mca/ras/xgrid/src/ras_xgrid.h +++ b/orte/mca/ess/slurm/ess_slurm.h @@ -15,26 +15,32 @@ * * $HEADER$ */ -/** - * @file - * - * Resource Allocation (xgrid) + +#ifndef ORTE_ESS_SLURM_H +#define ORTE_ESS_SLURM_H + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_slurm_component; + +/* + * Module open / close */ -#ifndef ORTE_RAS_xgrid_H -#define ORTE_RAS_xgrid_H +int orte_ess_slurm_component_open(void); +int orte_ess_slurm_component_close(void); +orte_ess_base_module_t* orte_ess_slurm_component_init(int *priority); -#include "orte/mca/ras/ras.h" -#include "orte/mca/ras/base/base.h" +/* + * Startup / Shutdown + */ +int orte_ess_slurm_finalize(void); -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +/* + * Module functions + */ +int orte_ess_slurm_set_name(void); - ORTE_DECLSPEC extern orte_ras_base_component_t mca_ras_xgrid_component; - ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_xgrid_module; -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS -#endif +#endif /* ORTE_ESS_SLURM_H */ diff --git a/orte/mca/sds/pipe/sds_pipe_component.c b/orte/mca/ess/slurm/ess_slurm_component.c similarity index 61% rename from orte/mca/sds/pipe/sds_pipe_component.c rename to orte/mca/ess/slurm/ess_slurm_component.c index 85806e4808..dc6083cea6 100644 --- a/orte/mca/sds/pipe/sds_pipe_component.c +++ b/orte/mca/ess/slurm/ess_slurm_component.c @@ -23,35 +23,36 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/pipe/sds_pipe.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_pipe_module; +#include "orte/util/proc_info.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/slurm/ess_slurm.h" + +extern orte_ess_base_module_t orte_ess_slurm_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_pipe_component = { - /* First, the mca_component_t struct containing meta information - about the component itself */ +orte_ess_base_component_t mca_ess_slurm_component = { { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ - "pipe", + "slurm", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_pipe_component_open, - orte_sds_pipe_component_close + orte_ess_slurm_component_open, + orte_ess_slurm_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -61,44 +62,40 @@ orte_sds_base_component_t mca_sds_pipe_component = { }, /* Initialization / querying functions */ - orte_sds_pipe_component_init + orte_ess_slurm_component_init }; int -orte_sds_pipe_component_open(void) +orte_ess_slurm_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_pipe_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_slurm_component_init(int *priority) { - int id; - char *mode; - - /* okay, not seed/singleton attempt another approach */ - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - if (NULL == mode || 0 != strcmp("pipe", mode)) { - if(NULL != mode) { - free(mode); - } - return NULL; + /* Are we running under a SLURM job? Were + * we given a path back to the HNP? If the + * answer to both is "yes", then we were launched + * by mpirun in a slurm world + */ + + if (NULL != getenv("SLURM_JOBID") && + NULL != orte_process_info.my_hnp_uri) { + *priority = 30; + return &orte_ess_slurm_module; } - - if(NULL != mode) { - free(mode); - } - *priority = 20; - return &orte_sds_pipe_module; + + /* Sadly, no */ + + return NULL; } int -orte_sds_pipe_component_close(void) +orte_ess_slurm_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c new file mode 100644 index 0000000000..91a433650f --- /dev/null +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ +#include + +#include "opal/util/opal_environ.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/util/argv.h" +#include "opal/util/show_help.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/sys_info.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/slurm/ess_slurm.h" + +static char *get_slurm_nodename(int nodeid); +static int slurm_set_name(void); + +static int rte_init(char flags); +static int rte_finalize(void); + + +orte_ess_base_module_t orte_ess_slurm_module = { + rte_init, + rte_finalize, + orte_ess_base_app_abort +}; + + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + + /* Start by getting a unique name */ + slurm_set_name(); + + /* if I am a daemon, complete my setup using the + * default procedure + */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_orted_setup"; + goto error; + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto error; + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_app_setup"; + goto error; + } + } + + return ORTE_SUCCESS; + +error: + opal_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +static int rte_finalize(void) +{ + int ret; + + /* if I am a daemon, finalize using the default procedure */ + if (orte_process_info.daemon) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else if (orte_process_info.tool) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { + ORTE_ERROR_LOG(ret); + } + } else { + /* otherwise, I must be an application process, so + * use that default procedure + */ + if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { + ORTE_ERROR_LOG(ret); + } + } + + return ret; +} + +static int slurm_set_name(void) +{ + int slurm_nodeid; + int rc; + int id; + orte_jobid_t jobid; + orte_vpid_t vpid; + char* jobid_string; + char* vpid_string; + + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "ess:slurm setting name")); + + id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); + mca_base_param_lookup_string(id, &jobid_string); + if (NULL == jobid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); + mca_base_param_lookup_string(id, &vpid_string); + if (NULL == vpid_string) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + ORTE_PROC_MY_NAME->jobid = jobid; + + /* fix up the vpid and make it the "real" vpid */ + slurm_nodeid = atoi(getenv("SLURM_NODEID")); + ORTE_PROC_MY_NAME->vpid = vpid + slurm_nodeid; + + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* fix up the system info nodename to match exactly what slurm returned */ + if (NULL != orte_system_info.nodename) { + free(orte_system_info.nodename); + } + orte_system_info.nodename = get_slurm_nodename(slurm_nodeid); + + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "ess:slurm set nodename to %s", + orte_system_info.nodename)); + + /* get the non-name common environmental variables */ + if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} + +static char * +get_slurm_nodename(int nodeid) +{ + char **names = NULL; + char *slurm_nodelist; + char *ret; + + slurm_nodelist = getenv("OMPI_MCA_orte_slurm_nodelist"); + + if (NULL == slurm_nodelist) { + return NULL; + } + + /* split the node list into an argv array */ + names = opal_argv_split(slurm_nodelist, ','); + if (NULL == names) { /* got an error */ + return NULL; + } + + /* check to see if there are enough entries */ + if (nodeid > opal_argv_count(names)) { + return NULL; + } + + ret = strdup(names[nodeid]); + + opal_argv_free(names); + + /* All done */ + return ret; +} diff --git a/orte/mca/gpr/null/Makefile.am b/orte/mca/ess/tool/Makefile.am similarity index 74% rename from orte/mca/gpr/null/Makefile.am rename to orte/mca/ess/tool/Makefile.am index 9281dd9de9..f4c248db95 100644 --- a/orte/mca/gpr/null/Makefile.am +++ b/orte/mca/ess/tool/Makefile.am @@ -17,27 +17,27 @@ # sources = \ - gpr_null_component.c \ - gpr_null.c \ - gpr_null.h + ess_tool.h \ + ess_tool_component.c \ + ess_tool_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_gpr_null_DSO +if OMPI_BUILD_ess_tool_DSO component_noinst = -component_install = mca_gpr_null.la +component_install = mca_ess_tool.la else -component_noinst = libmca_gpr_null.la +component_noinst = libmca_ess_tool.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_gpr_null_la_SOURCES = $(sources) -mca_gpr_null_la_LDFLAGS = -module -avoid-version +mca_ess_tool_la_SOURCES = $(sources) +mca_ess_tool_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_gpr_null_la_SOURCES =$(sources) -libmca_gpr_null_la_LDFLAGS = -module -avoid-version +libmca_ess_tool_la_SOURCES =$(sources) +libmca_ess_tool_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/ess/tool/configure.m4 b/orte/mca/ess/tool/configure.m4 new file mode 100644 index 0000000000..d847d537f6 --- /dev/null +++ b/orte/mca/ess/tool/configure.m4 @@ -0,0 +1,13 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Sandia National Laboratories. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ess_tool_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_ess_tool_CONFIG], [$1]) diff --git a/orte/mca/errmgr/proxy/configure.params b/orte/mca/ess/tool/configure.params similarity index 89% rename from orte/mca/errmgr/proxy/configure.params rename to orte/mca/ess/tool/configure.params index 3513f8d956..c410dd1eaf 100644 --- a/orte/mca/errmgr/proxy/configure.params +++ b/orte/mca/ess/tool/configure.params @@ -19,6 +19,9 @@ # $HEADER$ # -# Specific to this module - PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that we build +# whenever someone using daemons does + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/ess/tool/ess_tool.h b/orte/mca/ess/tool/ess_tool.h new file mode 100644 index 0000000000..de1a5244f5 --- /dev/null +++ b/orte/mca/ess/tool/ess_tool.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_ESS_TOOL_H +#define ORTE_ESS_TOOL_H + +BEGIN_C_DECLS + +/* + * Module open / close + */ +int orte_ess_tool_component_open(void); +int orte_ess_tool_component_close(void); +orte_ess_base_module_t* orte_ess_tool_component_init(int *priority); + + +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_tool_component; + +END_C_DECLS + +#endif /* ORTE_ESS_TOOL_H */ diff --git a/orte/mca/sds/env/sds_env_component.c b/orte/mca/ess/tool/ess_tool_component.c similarity index 64% rename from orte/mca/sds/env/sds_env_component.c rename to orte/mca/ess/tool/ess_tool_component.c index 34d7713f3f..2589eeda8e 100644 --- a/orte/mca/sds/env/sds_env_component.c +++ b/orte/mca/ess/tool/ess_tool_component.c @@ -23,33 +23,36 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" -#include "orte/mca/sds/sds.h" -#include "orte/mca/sds/env/sds_env.h" #include "opal/mca/base/mca_base_param.h" -extern orte_sds_base_module_t orte_sds_env_module; +#include "orte/util/proc_info.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/tool/ess_tool.h" + +extern orte_ess_base_module_t orte_ess_tool_module; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_sds_base_component_t mca_sds_env_component = { +orte_ess_base_component_t mca_ess_tool_component = { { - /* Indicate that we are a sds v1.0.0 component (which also + /* Indicate that we are a ess v1.0.0 component (which also implies a specific MCA version) */ - ORTE_SDS_BASE_VERSION_1_0_0, + ORTE_ESS_BASE_VERSION_1_0_0, /* Component name and version */ - "env", + "tool", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_sds_env_component_open, - orte_sds_env_component_close + orte_ess_tool_component_open, + orte_ess_tool_component_close }, /* Next the MCA v1.0.0 component meta data */ @@ -59,45 +62,39 @@ orte_sds_base_component_t mca_sds_env_component = { }, /* Initialization / querying functions */ - orte_sds_env_component_init + orte_ess_tool_component_init }; int -orte_sds_env_component_open(void) +orte_ess_tool_component_open(void) { return ORTE_SUCCESS; } -orte_sds_base_module_t * -orte_sds_env_component_init(int *priority) +orte_ess_base_module_t * +orte_ess_tool_component_init(int *priority) { - int id; - char *mode; - /* okay, not seed/singleton attempt another approach */ - id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL); - mca_base_param_lookup_string(id, &mode); - - if (NULL == mode || 0 != strcmp("env", mode)) { - if(NULL != mode) { - free(mode); - } - return NULL; + /* if we are a tool, we want to be selected + * UNLESS some enviro-specific component takes + * precedence. This would happen, for example, + * if the tool is a distributed set of processes + */ + if (orte_process_info.tool) { + *priority = 10; + return &orte_ess_tool_module; } - if(NULL != mode) { - free(mode); - } - - *priority = 20; - return &orte_sds_env_module; + /* else, don't */ + *priority = -1; + return NULL; } int -orte_sds_env_component_close(void) +orte_ess_tool_component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/tool/ess_tool_module.c b/orte/mca/ess/tool/ess_tool_module.c new file mode 100644 index 0000000000..d8ca69712b --- /dev/null +++ b/orte/mca/ess/tool/ess_tool_module.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/runtime/opal_cr.h" + +#include "orte/mca/plm/base/base.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/runtime/orte_cr.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/tool/ess_tool.h" + +static int rte_init(char flags); +static void rte_abort(int status, bool report) __opal_attribute_noreturn__; + + +orte_ess_base_module_t orte_ess_tool_module = { + rte_init, + orte_ess_base_tool_finalize, + rte_abort +}; + + +static int rte_init(char flags) +{ + int ret; + char *error = NULL; + + /* If we are a tool with no name, then responsibility for + * defining the name falls to the PLM component for our + * respective environment - hence, we have to open the PLM + * first and select that component. Note that ONLY the + * HNP ever uses a PLM component, so we ONLY use the PLM + * here to set our name and then close it + * + * NOTE: Tools with names - i.e., tools consisting of a + * distributed set of processes - will select and use + * the appropriate enviro-specific module and -not- this one! + */ + if (ORTE_SUCCESS != (ret = orte_plm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_select"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_set_hnp_name"; + goto error; + } + /* close the plm since we opened it to set our + * name, but have no further use for it + */ + orte_plm_base_close(); + + /* do the rest of the standard tool init */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto error; + } + + /* All done */ + orte_initialized = true; + return ORTE_SUCCESS; + +error: + opal_show_help("help-ess-tool.txt", + "tool:rte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + + return ret; +} + +/* + * If we are a tool-without-name, then we look just like the HNP. + * In that scenario, it could be beneficial to get a core file, so + * we call abort. + */ +static void rte_abort(int status, bool report) +{ + /* do NOT do a normal finalize as this will very likely + * hang the process. We are aborting due to an abnormal condition + * that precludes normal cleanup + * + * We do need to do the following bits to make sure we leave a + * clean environment. Taken from orte_finalize(): + * - Assume errmgr cleans up child processes before we exit. + */ + + /* CRS cleanup since it may have a named pipe and thread active */ + orte_cr_finalize(); + + /* - Clean out the global structures + * (not really necessary, but good practice) + */ + orte_sys_info_finalize(); + orte_proc_info_finalize(); + + /* Now abort */ + abort(); +} + diff --git a/orte/mca/filem/base/Makefile.am b/orte/mca/filem/base/Makefile.am index 2e8c7bb9c8..d65df11e60 100644 --- a/orte/mca/filem/base/Makefile.am +++ b/orte/mca/filem/base/Makefile.am @@ -25,4 +25,5 @@ libmca_filem_la_SOURCES += \ base/filem_base_open.c \ base/filem_base_close.c \ base/filem_base_select.c \ + base/filem_base_receive.c \ base/filem_base_fns.c diff --git a/orte/mca/filem/base/base.h b/orte/mca/filem/base/base.h index e96493bde0..404b61f5dd 100644 --- a/orte/mca/filem/base/base.h +++ b/orte/mca/filem/base/base.h @@ -21,7 +21,7 @@ #include "orte_config.h" #include "orte/mca/rml/rml.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "orte/mca/filem/filem.h" @@ -29,9 +29,15 @@ * Global functions for MCA overall FILEM */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS + +/* + * cmds for base receive + */ +typedef uint8_t orte_filem_cmd_flag_t; +#define ORTE_FILEM_CMD OPAL_UINT8 +#define ORTE_FILEM_GET_PROC_NODE_NAME_CMD 1 +#define ORTE_FILEM_GET_REMOTE_PATH_CMD 2 /** * FileM request object maintenance functions @@ -106,27 +112,25 @@ extern "C" { /** * Some utility functions */ - ORTE_DECLSPEC int orte_filem_base_listener_init(orte_rml_buffer_callback_fn_t rml_cbfunc); - ORTE_DECLSPEC int orte_filem_base_listener_cancel(void); + /* base comm functions */ + ORTE_DECLSPEC int orte_filem_base_comm_start(void); + ORTE_DECLSPEC int orte_filem_base_comm_stop(void); + ORTE_DECLSPEC void orte_filem_base_recv(int status, orte_process_name_t* sender, + opal_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); + /** * Get Node Name for an ORTE process */ ORTE_DECLSPEC int orte_filem_base_get_proc_node_name(orte_process_name_t *proc, char **machine_name); - ORTE_DECLSPEC int orte_filem_base_query_remote_path(char **remote_ref, orte_process_name_t *peer, int *flag); - ORTE_DECLSPEC void orte_filem_base_query_callback(int status, - orte_process_name_t* peer, - orte_buffer_t *buffer, - orte_rml_tag_t tag, - void* cbdata); + ORTE_DECLSPEC int orte_filem_base_get_remote_path(char **remote_ref, orte_process_name_t *peer, int *flag); /** * Setup request structure */ ORTE_DECLSPEC int orte_filem_base_prepare_request(orte_filem_base_request_t *request, int move_type); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif /* ORTE_FILEM_BASE_H */ diff --git a/orte/mca/filem/base/filem_base_close.c b/orte/mca/filem/base/filem_base_close.c index 56c943536d..5ee09b1f1f 100644 --- a/orte/mca/filem/base/filem_base_close.c +++ b/orte/mca/filem/base/filem_base_close.c @@ -15,8 +15,8 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" diff --git a/orte/mca/filem/base/filem_base_fns.c b/orte/mca/filem/base/filem_base_fns.c index 99e7a36cf7..e2ae1a4e14 100644 --- a/orte/mca/filem/base/filem_base_fns.c +++ b/orte/mca/filem/base/filem_base_fns.c @@ -24,7 +24,7 @@ #endif #include -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" @@ -34,9 +34,10 @@ #include "opal/mca/base/mca_base_param.h" #include "opal/util/os_dirpath.h" -#include "orte/mca/gpr/gpr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" #include "orte/mca/filem/filem.h" #include "orte/mca/filem/base/base.h" @@ -94,7 +95,7 @@ ORTE_DECLSPEC OBJ_CLASS_INSTANCE(orte_filem_base_request_t, orte_filem_base_construct, orte_filem_base_destruct); -ORTE_DECLSPEC void orte_filem_base_construct(orte_filem_base_request_t *req) { +void orte_filem_base_construct(orte_filem_base_request_t *req) { OBJ_CONSTRUCT(&req->process_sets, opal_list_t); OBJ_CONSTRUCT(&req->file_sets, opal_list_t); @@ -108,7 +109,7 @@ ORTE_DECLSPEC void orte_filem_base_construct(orte_filem_base_request_t *req) { req->movement_type = ORTE_FILEM_MOVE_TYPE_UNKNOWN; } -ORTE_DECLSPEC void orte_filem_base_destruct( orte_filem_base_request_t *req) { +void orte_filem_base_destruct( orte_filem_base_request_t *req) { opal_list_item_t* item = NULL; while( NULL != (item = opal_list_remove_first(&req->process_sets)) ) { @@ -207,155 +208,114 @@ int orte_filem_base_none_wait_all(opal_list_t *request_list) /******************** * Utility functions ********************/ -int orte_filem_base_listener_init(orte_rml_buffer_callback_fn_t rml_cbfunc) { - int ret; - - if( ORTE_SUCCESS != (ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_FILEM, - 0, - rml_cbfunc, - NULL)) ) { - return ret; - } - - return ORTE_SUCCESS; -} - -int orte_filem_base_listener_cancel() { - int ret; - - if( ORTE_SUCCESS != (ret = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_FILEM) ) ) { - return ret; - } - - return ORTE_SUCCESS; -} - int orte_filem_base_get_proc_node_name(orte_process_name_t *proc, char **machine_name) { - int ret, exit_status = ORTE_SUCCESS; - char *segment = NULL, **tokens, *keys[2]; - orte_gpr_value_t** values = NULL; - orte_std_cntr_t num_tokens = 0, num_values = 0, i, j; - char *tmp_node_name = NULL; - - /* - * Contact GPR and get the 'orte-node-name' for this process - */ - keys[0] = ORTE_NODE_NAME_KEY; - keys[1] = NULL; + int ret; + orte_std_cntr_t count; + opal_buffer_t request, answer; + orte_filem_cmd_flag_t command=ORTE_FILEM_GET_PROC_NODE_NAME_CMD; - /* - * Get the job segment - */ - if(ORTE_SUCCESS != (ret = orte_schema.get_job_segment_name(&segment, proc->jobid))) { - exit_status = ret; - goto cleanup; - } + /* set default answer */ + *machine_name = NULL; - /* - * Get the process tokens - */ - if (ORTE_SUCCESS != (ret = orte_schema.get_proc_tokens(&tokens, - &num_tokens, - proc) )) { - exit_status = ret; - goto cleanup; - } + if (orte_process_info.hnp) { + /* if I am the HNP, then all the data structures are local to me - no + * need to send messages around to get the info + */ + orte_job_t *jdata; + orte_proc_t **procs; - /* - * Get the requested values - */ - if( ORTE_SUCCESS != (ret = orte_gpr.get(ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR, - segment, - tokens, - keys, - &num_values, - &values ) ) ) { - - exit_status = ret; - goto cleanup; - } - - /* - * Parse the values - */ - for(i = 0; i < num_values; ++i) { - orte_gpr_value_t* value = values[i]; - - for(j = 0; j < value->cnt; ++j) { - orte_gpr_keyval_t* keyval = value->keyvals[j]; - - if (strcmp(keyval->key, keys[0]) == 0) { - if (ORTE_SUCCESS != (ret = orte_dss.get((void**)&(tmp_node_name), keyval->value, ORTE_STRING))) { - exit_status = ret; - goto cleanup; - } - *machine_name = strdup(tmp_node_name); - if(NULL != tmp_node_name) { - free(tmp_node_name); - tmp_node_name = NULL; - } - continue; - } + /* get the job data object for this proc */ + if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; } + /* get the proc object for it */ + procs = (orte_proc_t**)jdata->procs->addr; + if (NULL == procs[proc->vpid] || NULL == procs[proc->vpid]->node) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + *machine_name = strdup(procs[proc->vpid]->node->name); + return ORTE_SUCCESS; + } + + /* if I am not the HNP, then I have to send a request to the HNP + * for the information + */ + OBJ_CONSTRUCT(&request, opal_buffer_t); + OBJ_CONSTRUCT(&answer, opal_buffer_t); + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&request, &command, 1, ORTE_FILEM_CMD))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + if (ORTE_SUCCESS != (ret = opal_dss.pack(&request, proc, 1, ORTE_NAME))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + if (0 > (ret = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &request, ORTE_RML_TAG_FILEM_BASE, 0))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; } - if (NULL == *machine_name ){ - exit_status = ORTE_ERROR; - goto cleanup; + /* wait for answer */ + if (0 > (ret = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &answer, ORTE_RML_TAG_FILEM_BASE_RESP, 0))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + /* unpack the machine name */ + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(&answer, machine_name, &count, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; } - cleanup: - if( NULL != segment) - free(segment); + CLEANUP: + OBJ_DESTRUCT(&answer); + OBJ_DESTRUCT(&request); - if(NULL != tmp_node_name) { - free(tmp_node_name); - tmp_node_name = NULL; - } - - return exit_status; + return ret; } /* - * This function is paired with the orte_filem_base_query_callback() function on the remote machine + * This function is paired with the filem_base_process_get_remote_path_cmd() function on the remote machine */ -int orte_filem_base_query_remote_path(char **remote_ref, orte_process_name_t *peer, int *flag) { +int orte_filem_base_get_remote_path(char **remote_ref, orte_process_name_t *peer, int *flag) { int ret, exit_status = ORTE_SUCCESS; char *tmp_ref = NULL; orte_std_cntr_t n; - orte_buffer_t *loc_buffer = NULL; + opal_buffer_t request, answer; int tmp_flag; + orte_filem_cmd_flag_t command=ORTE_FILEM_GET_REMOTE_PATH_CMD; /* * Ask for remote file information from the HNP */ - if( NULL == (loc_buffer = OBJ_NEW(orte_buffer_t) ) ) { - exit_status = ORTE_ERR_OUT_OF_RESOURCE; + OBJ_CONSTRUCT(&request, opal_buffer_t); + OBJ_CONSTRUCT(&answer, opal_buffer_t); + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&request, &command, 1, ORTE_FILEM_CMD))) { + ORTE_ERROR_LOG(ret); goto cleanup; } - if (ORTE_SUCCESS != (ret = orte_dss.pack(loc_buffer, remote_ref, 1, ORTE_STRING))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&request, remote_ref, 1, OPAL_STRING))) { exit_status = ret; goto cleanup; } - if (0 > (ret = orte_rml.send_buffer(peer, loc_buffer, ORTE_RML_TAG_FILEM, 0))) { + if (0 > (ret = orte_rml.send_buffer(peer, &request, ORTE_RML_TAG_FILEM_BASE, 0))) { exit_status = ret; goto cleanup; } - OBJ_RELEASE(loc_buffer); - if( NULL == (loc_buffer = OBJ_NEW(orte_buffer_t) ) ) { - exit_status = ORTE_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - /* * Get the response */ - if( 0 > (ret = orte_rml.recv_buffer(peer, loc_buffer, ORTE_RML_TAG_FILEM, 0)) ) { + if( 0 > (ret = orte_rml.recv_buffer(peer, &answer, ORTE_RML_TAG_FILEM_BASE_RESP, 0)) ) { exit_status = ret; goto cleanup; } @@ -364,7 +324,7 @@ int orte_filem_base_query_remote_path(char **remote_ref, orte_process_name_t *pe * The absolute path for the remote file */ n = 1; - if ( ORTE_SUCCESS != (ret = orte_dss.unpack(loc_buffer, &tmp_ref, &n, ORTE_STRING)) ) { + if ( ORTE_SUCCESS != (ret = opal_dss.unpack(&answer, &tmp_ref, &n, OPAL_STRING)) ) { exit_status = ret; goto cleanup; } @@ -373,7 +333,7 @@ int orte_filem_base_query_remote_path(char **remote_ref, orte_process_name_t *pe * The file type on the remote machine */ n = 1; - if ( ORTE_SUCCESS != (ret = orte_dss.unpack(loc_buffer, &tmp_flag, &n, ORTE_INT)) ) { + if ( ORTE_SUCCESS != (ret = opal_dss.unpack(&answer, &tmp_flag, &n, OPAL_INT)) ) { exit_status = ret; goto cleanup; } @@ -385,119 +345,15 @@ int orte_filem_base_query_remote_path(char **remote_ref, orte_process_name_t *pe *flag = tmp_flag; cleanup: - if( NULL != loc_buffer) - OBJ_RELEASE(loc_buffer); + OBJ_DESTRUCT(&answer); + OBJ_DESTRUCT(&request); + if( NULL != tmp_ref) free(tmp_ref); return exit_status; } -/* - * This function is paired with the orte_filem_base_query_remote_path() function on the - * requesting machine. - * This function is responsible for: - * - Constructing the remote absolute path for the specified file/dir - * - Verify the existence of the file/dir - * - Determine if the specified file/dir is in fact a file or dir or unknown if not found. - * - */ -void orte_filem_base_query_callback(int status, - orte_process_name_t* peer, - orte_buffer_t *buffer, - orte_rml_tag_t tag, - void* cbdata) { - int ret, exit_status = ORTE_SUCCESS; - orte_std_cntr_t n; - orte_buffer_t loc_buffer; - char *filename = NULL; - char *tmp_name = NULL; - char cwd[OMPI_PATH_MAX]; - int file_type = ORTE_FILEM_TYPE_UNKNOWN; - struct stat file_status; - - /* - * Receive the file/dir name in question - */ - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &filename, &n, ORTE_STRING))) { - exit_status = ret; - goto cleanup; - } - - OBJ_CONSTRUCT(&loc_buffer, orte_buffer_t); - - /* - * Determine the absolute path of the file - */ - if(filename[0] != '/') { /* if it is not an absolute path already */ - getcwd(cwd, sizeof(cwd)); - asprintf(&tmp_name, "%s/%s", cwd, filename); - } - else { - tmp_name = strdup(filename); - } - - opal_output_verbose(10, orte_filem_base_output, - "filem:base: filem_base_query_callback: %s -> %s: Filename Requested (%s) translated to (%s)", - ORTE_NAME_PRINT(orte_process_info.my_name), - ORTE_NAME_PRINT(peer), - filename, tmp_name); - - /* - * Determine if the file/dir exists at that absolute path - * Determine if the file/dir is a file or a directory - */ - if(0 != (ret = stat(tmp_name, &file_status) ) ){ - file_type = ORTE_FILEM_TYPE_UNKNOWN; - } - else { - /* Is it a directory? */ - if(S_ISDIR(file_status.st_mode)) { - file_type = ORTE_FILEM_TYPE_DIR; - } - else if(S_ISREG(file_status.st_mode)) { - file_type = ORTE_FILEM_TYPE_FILE; - } - } - - /* - * Send back the Absolute Path - */ - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.pack(&loc_buffer, &tmp_name, n, ORTE_STRING))) { - exit_status = ret; - goto cleanup; - } - - /* - * Send back the reference type - * - ORTE_FILEM_TYPE_FILE = File - * - ORTE_FILEM_TYPE_DIR = Directory - * - ORTE_FILEM_TYPE_UNKNOWN = Could not be determined, or does not exist - */ - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.pack(&loc_buffer, &file_type, n, ORTE_INT))) { - exit_status = ret; - goto cleanup; - } - - if (0 > (ret = orte_rml.send_buffer(peer, &loc_buffer, ORTE_RML_TAG_FILEM, 0))) { - exit_status = ret; - goto cleanup; - } - - cleanup: - OBJ_DESTRUCT(&loc_buffer); - - if( NULL != filename) - free(filename); - if( NULL != tmp_name) - free(tmp_name); - - return; -} - int orte_filem_base_prepare_request(orte_filem_base_request_t *request, int move_type) { int num_reqs = 0, i = 0; diff --git a/orte/mca/filem/base/filem_base_open.c b/orte/mca/filem/base/filem_base_open.c index 0802c4285d..99ffd9bd80 100644 --- a/orte/mca/filem/base/filem_base_open.c +++ b/orte/mca/filem/base/filem_base_open.c @@ -16,7 +16,7 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" diff --git a/orte/mca/filem/base/filem_base_receive.c b/orte/mca/filem/base/filem_base_receive.c new file mode 100644 index 0000000000..b3aaee595c --- /dev/null +++ b/orte/mca/filem/base/filem_base_receive.c @@ -0,0 +1,309 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" + +#if HAVE_SYS_TYPES_H +#include +#endif +#if HAVE_SYS_STAT_H +#include +#endif +#if HAVE_UNISTD_H +#include +#endif + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_param.h" + +#include "opal/dss/dss.h" +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wakeup.h" + +#include "orte/mca/filem/filem.h" +#include "orte/mca/filem/base/base.h" + +/* + * Functions to process some FileM specific commands + */ +static void filem_base_process_get_proc_node_name_cmd(orte_process_name_t* sender, + opal_buffer_t* buffer); +static void filem_base_process_get_remote_path_cmd(orte_process_name_t* sender, + opal_buffer_t* buffer); + +static bool recv_issued=false; + +int orte_filem_base_comm_start(void) +{ + int rc; + + if (recv_issued && orte_process_info.hnp) { + return ORTE_SUCCESS; + } + + OPAL_OUTPUT_VERBOSE((5, orte_filem_base_output, + "%s filem:base: Receive: Start command recv", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_FILEM_BASE, + ORTE_RML_PERSISTENT, + orte_filem_base_recv, + NULL))) { + ORTE_ERROR_LOG(rc); + } + + recv_issued = true; + + return rc; +} + + +int orte_filem_base_comm_stop(void) +{ + int rc; + + if (!recv_issued && orte_process_info.hnp) { + return ORTE_SUCCESS; + } + + OPAL_OUTPUT_VERBOSE((5, orte_filem_base_output, + "%s filem:base:receive stop comm", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_FILEM_BASE))) { + ORTE_ERROR_LOG(rc); + } + recv_issued = false; + + return rc; +} + + +/* + * handle message from proxies + * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. + * DO NOT RELEASE THIS BUFFER IN THIS CODE + */ +void orte_filem_base_recv(int status, orte_process_name_t* sender, + opal_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) +{ + orte_filem_cmd_flag_t command; + orte_std_cntr_t count; + int rc; + + OPAL_OUTPUT_VERBOSE((5, orte_filem_base_output, + "%s filem:base: Receive a command message.", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + count = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &command, &count, ORTE_FILEM_CMD))) { + ORTE_ERROR_LOG(rc); + return; + } + + switch (command) { + case ORTE_FILEM_GET_PROC_NODE_NAME_CMD: + OPAL_OUTPUT_VERBOSE((10, orte_filem_base_output, + "%s filem:base: Command: Get Proc node name command", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + filem_base_process_get_proc_node_name_cmd(sender, buffer); + break; + + case ORTE_FILEM_GET_REMOTE_PATH_CMD: + OPAL_OUTPUT_VERBOSE((10, orte_filem_base_output, + "%s filem:base: Command: Get remote path command", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + filem_base_process_get_remote_path_cmd(sender, buffer); + break; + + default: + ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); + } +} + +static void filem_base_process_get_proc_node_name_cmd(orte_process_name_t* sender, + opal_buffer_t* buffer) +{ + opal_buffer_t answer; + orte_std_cntr_t count; + orte_job_t *jdata = NULL; + orte_proc_t **procs = NULL; + orte_process_name_t name; + int rc; + + OBJ_CONSTRUCT(&answer, opal_buffer_t); + + /* + * Unpack the data + */ + count = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &name, &count, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* + * Process the data + */ + /* get the job data object for this proc */ + if (NULL == (jdata = orte_get_job_data_object(name.jobid))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + orte_wakeup(1); + goto CLEANUP; + } + /* get the proc object for it */ + procs = (orte_proc_t**)jdata->procs->addr; + if (NULL == procs[name.vpid] || NULL == procs[name.vpid]->node) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + orte_wakeup(1); + goto CLEANUP; + } + + /* + * Send back the answer + */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&answer, &(procs[name.vpid]->node->name), 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + orte_wakeup(1); + goto CLEANUP; + } + + if (0 > (rc = orte_rml.send_buffer(sender, &answer, ORTE_RML_TAG_FILEM_BASE_RESP, 0))) { + ORTE_ERROR_LOG(rc); + } + + CLEANUP: + OBJ_DESTRUCT(&answer); + +} + +/* + * This function is responsible for: + * - Constructing the remote absolute path for the specified file/dir + * - Verify the existence of the file/dir + * - Determine if the specified file/dir is in fact a file or dir or unknown if not found. + */ +static void filem_base_process_get_remote_path_cmd(orte_process_name_t* sender, + opal_buffer_t* buffer) +{ + opal_buffer_t answer; + orte_std_cntr_t count; + char *filename = NULL; + char *tmp_name = NULL; + char cwd[OMPI_PATH_MAX]; + int file_type = ORTE_FILEM_TYPE_UNKNOWN; + struct stat file_status; + int rc; + + /* + * Unpack the data + */ + OBJ_CONSTRUCT(&answer, opal_buffer_t); + + count = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &filename, &count, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* + * Determine the absolute path of the file + */ + if(filename[0] != '/') { /* if it is not an absolute path already */ + getcwd(cwd, sizeof(cwd)); + asprintf(&tmp_name, "%s/%s", cwd, filename); + } + else { + tmp_name = strdup(filename); + } + + opal_output_verbose(10, orte_filem_base_output, + "filem:base: process_get_remote_path_cmd: %s -> %s: Filename Requested (%s) translated to (%s)", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(sender), + filename, tmp_name); + + /* + * Determine if the file/dir exists at that absolute path + * Determine if the file/dir is a file or a directory + */ + if(0 != (rc = stat(tmp_name, &file_status) ) ){ + file_type = ORTE_FILEM_TYPE_UNKNOWN; + } + else { + /* Is it a directory? */ + if(S_ISDIR(file_status.st_mode)) { + file_type = ORTE_FILEM_TYPE_DIR; + } + else if(S_ISREG(file_status.st_mode)) { + file_type = ORTE_FILEM_TYPE_FILE; + } + } + + /* + * Pack up the response + * Send back the reference type + * - ORTE_FILEM_TYPE_FILE = File + * - ORTE_FILEM_TYPE_DIR = Directory + * - ORTE_FILEM_TYPE_UNKNOWN = Could not be determined, or does not exist + */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&answer, &tmp_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + orte_wakeup(1); + goto CLEANUP; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(&answer, &file_type, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + orte_wakeup(1); + goto CLEANUP; + } + + if (0 > (rc = orte_rml.send_buffer(sender, &answer, ORTE_RML_TAG_FILEM_BASE_RESP, 0))) { + ORTE_ERROR_LOG(rc); + } + + CLEANUP: + if( NULL != filename) { + free(filename); + filename = NULL; + } + if( NULL != tmp_name) { + free(tmp_name); + tmp_name = NULL; + } + + OBJ_DESTRUCT(&answer); +} diff --git a/orte/mca/filem/base/filem_base_select.c b/orte/mca/filem/base/filem_base_select.c index 69b0c89ea4..c11b3c5417 100644 --- a/orte/mca/filem/base/filem_base_select.c +++ b/orte/mca/filem/base/filem_base_select.c @@ -15,8 +15,8 @@ */ #include "orte_config.h" +#include "orte/constants.h" -#include "orte/orte_constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" diff --git a/orte/mca/filem/filem.h b/orte/mca/filem/filem.h index 82627eb610..30fedb7b89 100644 --- a/orte/mca/filem/filem.h +++ b/orte/mca/filem/filem.h @@ -26,12 +26,11 @@ #define MCA_FILEM_H #include "orte_config.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" +#include "orte/constants.h" +#include "orte/types.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" -#include "orte/mca/ns/ns.h" #include "opal/class/opal_object.h" diff --git a/orte/mca/filem/rsh/filem_rsh_component.c b/orte/mca/filem/rsh/filem_rsh_component.c index 1773f1c5b2..8adc24c10b 100644 --- a/orte/mca/filem/rsh/filem_rsh_component.c +++ b/orte/mca/filem/rsh/filem_rsh_component.c @@ -15,9 +15,10 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include "opal/util/output.h" -#include "orte/orte_constants.h" + #include "orte/mca/filem/filem.h" #include "orte/mca/filem/base/base.h" #include "filem_rsh.h" diff --git a/orte/mca/filem/rsh/filem_rsh_module.c b/orte/mca/filem/rsh/filem_rsh_module.c index 4134dde538..193f9ff5c0 100644 --- a/orte/mca/filem/rsh/filem_rsh_module.c +++ b/orte/mca/filem/rsh/filem_rsh_module.c @@ -19,7 +19,7 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include #include @@ -45,7 +45,9 @@ #include "opal/threads/threads.h" #include "opal/threads/condition.h" -#include "orte/mca/gpr/gpr.h" +#include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" +#include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/filem/filem.h" @@ -71,11 +73,6 @@ static int start_child(char * command, static int orte_filem_rsh_query_remote_path(char **remote_ref, orte_process_name_t *proc, int *flag); -static void orte_filem_rsh_query_callback(int status, - orte_process_name_t* sender, - orte_buffer_t *buffer, - orte_rml_tag_t tag, - void* cbdata); static void filem_rsh_waitpid_cb(pid_t pid, int status, void* cbdata); @@ -84,7 +81,7 @@ static int orte_filem_rsh_permission_listener_init(orte_rml_buffer_callback_fn_t static int orte_filem_rsh_permission_listener_cancel(void); static void orte_filem_rsh_permission_callback(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata); static int orte_filem_rsh_permission_ask(orte_process_name_t* sender, int num_sends); @@ -209,8 +206,8 @@ static orte_filem_base_module_t loc_module = { orte_filem_base_module_1_0_0_t * orte_filem_rsh_component_query(int *priority) { - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: component_query()"); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: component_query()")); *priority = mca_filem_rsh_component.super.priority; @@ -221,8 +218,8 @@ int orte_filem_rsh_module_init(void) { int ret; - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: module_init()"); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: module_init()")); /* * Allocate the work pools @@ -236,15 +233,6 @@ int orte_filem_rsh_module_init(void) work_pool_all_done = false; - /* - * Start the listener for path resolution - */ - if( ORTE_SUCCESS != (ret = orte_filem_base_listener_init(orte_filem_rsh_query_callback) ) ) { - opal_output(mca_filem_rsh_component.super.output_handle, - "filem:rsh:init Failed to start listener\n"); - return ret; - } - /* * Start the listener for permission */ @@ -254,6 +242,12 @@ int orte_filem_rsh_module_init(void) return ret; } + /* start the base receive */ + if (ORTE_SUCCESS != (ret = orte_filem_base_comm_start())) { + opal_output(mca_filem_rsh_component.super.output_handle, + "filem:rsh:init Failed to start base receive\n"); + return ret; + } return ORTE_SUCCESS; } @@ -261,8 +255,8 @@ int orte_filem_rsh_module_finalize(void) { opal_list_item_t *item = NULL; - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: module_finalize()"); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: module_finalize()")); /* * Make sure all active requests are completed @@ -274,8 +268,12 @@ int orte_filem_rsh_module_finalize(void) /* * Stop the listeners */ - orte_filem_base_listener_cancel(); orte_filem_rsh_permission_listener_cancel(); + + /* + * Stop the base receive + */ + orte_filem_base_comm_stop(); /* * Deallocate the work pools @@ -492,8 +490,8 @@ int orte_filem_rsh_wait(orte_filem_base_request_t *request) continue; } - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: wait(): Transfer complete. Cleanup\n"); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: wait(): Transfer complete. Cleanup\n")); opal_list_remove_item(&work_pool_active, item); @@ -550,6 +548,8 @@ int orte_filem_rsh_wait_all(opal_list_t * request_list) orte_filem_base_request_t *request = (orte_filem_base_request_t *) item; if( ORTE_SUCCESS != (ret = orte_filem_rsh_wait(request)) ) { + opal_output(mca_filem_rsh_component.super.output_handle, + "filem:rsh: wait_all(): Wait failed (%d)", ret); exit_status = ret; goto cleanup; } @@ -584,31 +584,64 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) { p_item = opal_list_get_next( p_item) ) { orte_filem_base_process_set_t * p_set = (orte_filem_base_process_set_t*)p_item; - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: copy(): %s -> %s Moving file %s to %s\n", - ORTE_NAME_PRINT(&p_set->source), - ORTE_NAME_PRINT(&p_set->sink), - f_set->local_target, - f_set->remote_target); + if( request->movement_type == ORTE_FILEM_MOVE_TYPE_PUT ) { + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: copy(): %s -> %s: Moving file %s to %s\n", + ORTE_NAME_PRINT(&p_set->source), + ORTE_NAME_PRINT(&p_set->sink), + f_set->local_target, + f_set->remote_target)); + } else { + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: copy(): %s -> %s: Moving file %s to %s\n", + ORTE_NAME_PRINT(&p_set->source), + ORTE_NAME_PRINT(&p_set->sink), + f_set->remote_target, + f_set->local_target)); + } /* * Get the remote machine identifier from the process_name struct */ + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: copy(): %s -> %s: Get node name.\n", + ORTE_NAME_PRINT(&p_set->source), + ORTE_NAME_PRINT(&p_set->sink))); if( ORTE_SUCCESS != (ret = orte_filem_base_get_proc_node_name(&p_set->source, &remote_machine))) { + opal_output(mca_filem_rsh_component.super.output_handle, + "filem:rsh: copy(): Get Node Name failed (%d)", ret); exit_status = ret; goto cleanup; } + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: copy(): %s -> %s: Got node name: %s\n", + ORTE_NAME_PRINT(&p_set->source), + ORTE_NAME_PRINT(&p_set->sink), + remote_machine)); /* * Fix the remote_filename. * If it is an absolute path, then assume it is valid for the remote server * ow then we must construct the correct path. */ + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: copy(): %s -> %s: Query remote path (%s).\n", + ORTE_NAME_PRINT(&p_set->source), + ORTE_NAME_PRINT(&p_set->sink), + f_set->remote_target)); remote_file = strdup(f_set->remote_target); if( ORTE_SUCCESS != (ret = orte_filem_rsh_query_remote_path(&remote_file, &p_set->source, &f_set->target_flag) ) ) { + opal_output(mca_filem_rsh_component.super.output_handle, + "filem:rsh: copy(): Query Remote Path failed (%d)", ret); exit_status = ret; goto cleanup; } + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: copy(): %s -> %s: Remote path (%s) is (%s).\n", + ORTE_NAME_PRINT(&p_set->source), + ORTE_NAME_PRINT(&p_set->sink), + f_set->remote_target, + remote_file)); /* * Transfer the file or directory @@ -617,6 +650,8 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) { dir_arg = strdup(" -r "); } else if(ORTE_FILEM_TYPE_UNKNOWN == f_set->target_flag) { + opal_output(mca_filem_rsh_component.super.output_handle, + "filem:rsh: copy(): Error: File type unknown"); goto continue_set; } else { @@ -633,8 +668,8 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) { f_set->local_target, remote_machine, remote_file); - opal_output_verbose(17, mca_filem_rsh_component.super.output_handle, - "filem:rsh:put about to execute [%s]", command); + OPAL_OUTPUT_VERBOSE((17, mca_filem_rsh_component.super.output_handle, + "filem:rsh:put about to execute [%s]", command)); if( ORTE_SUCCESS != (ret = orte_filem_rsh_start_command(p_set, f_set, @@ -656,8 +691,8 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) { remote_file, f_set->local_target); - opal_output_verbose(17, mca_filem_rsh_component.super.output_handle, - "filem:rsh:get about to execute [%s]", command); + OPAL_OUTPUT_VERBOSE((17, mca_filem_rsh_component.super.output_handle, + "filem:rsh:get about to execute [%s]", command)); if( ORTE_SUCCESS != (ret = orte_filem_rsh_start_command(p_set, f_set, @@ -777,8 +812,8 @@ static int orte_filem_rsh_start_rm(orte_filem_base_request_t *request) dir_arg, remote_targets); - opal_output_verbose(15, mca_filem_rsh_component.super.output_handle, - "filem:rsh:rm about to execute [%s]", command); + OPAL_OUTPUT_VERBOSE((15, mca_filem_rsh_component.super.output_handle, + "filem:rsh:rm about to execute [%s]", command)); if( ORTE_SUCCESS != (ret = orte_filem_rsh_start_command(p_set, NULL, @@ -873,9 +908,9 @@ static int orte_filem_rsh_start_command(orte_filem_base_process_set_t *proc_set /* * Ask for permission to send this file so we do not overwhelm the peer */ - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: start_command(): Ask permission to send from proc %s", - ORTE_NAME_PRINT(&(proc_set->source))); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: start_command(): Ask permission to send from proc %s", + ORTE_NAME_PRINT(&(proc_set->source)))); if( ORTE_SUCCESS != (ret = orte_filem_rsh_permission_ask(&(proc_set->source), 1)) ) { return ret; } @@ -893,9 +928,9 @@ static int start_child(char * command, char **argv = NULL; int status, ret; - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: start_child(): Starting the command [%s]", - command); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: start_child(): Starting the command [%s]", + command)); /* fork() -> done = false, active = true */ request->is_done[index] = false; request->is_active[index] = true; @@ -913,9 +948,9 @@ static int start_child(char * command, exit(ORTE_ERROR); } else if( request->exit_status[index] > 0 ) { - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: start_child(): Started Child %d Running command [%s]", - request->exit_status[index], command); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: start_child(): Started Child %d Running command [%s]", + request->exit_status[index], command)); /* * Register a callback for when this process exits @@ -941,9 +976,9 @@ static void filem_rsh_waitpid_cb(pid_t pid, int status, void* cbdata) opal_list_item_t *item = NULL; int index; - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: waitpid_cb(): Pid %d finished with status [%d].\n", - pid, status); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: waitpid_cb(): Pid %d finished with status [%d].\n", + pid, status)); /* * Find this pid in the active queue @@ -961,9 +996,9 @@ static void filem_rsh_waitpid_cb(pid_t pid, int status, void* cbdata) /* waitpid() -> done = true, active = false */ request->is_done[index] = true; request->is_active[index] = false; - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: waitpid_cb(): Marked pid %d as complete [status = %d].\n", - pid, status); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: waitpid_cb(): Marked pid %d as complete [status = %d].\n", + pid, status)); break; } } @@ -979,62 +1014,30 @@ static void filem_rsh_waitpid_cb(pid_t pid, int status, void* cbdata) * Path resolution functions ******************************/ /* - * This function is paired with the orte_filem_rsh_query_callback() function on the remote machine + * This function is paired with the filem_base_process_get_remote_path_cmd() function on the remote machine */ static int orte_filem_rsh_query_remote_path(char **remote_ref, orte_process_name_t *peer, int *flag) { int ret; -#if 1 /* JJH: Some debugging */ - /* If it is an absolute path */ +#if 0 + /* An optimization if we are guarenteed that this remote files exists. + * Then the 'scp -r' option will work with both files and directories. + * JJH: For general correctness disable this piece of code. + */ if( *remote_ref[0] == '/' ) { *flag = ORTE_FILEM_TYPE_DIR; return ORTE_SUCCESS; } #endif - /* Put our listener on hold */ - orte_filem_base_listener_cancel(); - /* Call the base function */ - if( ORTE_SUCCESS != (ret = orte_filem_base_query_remote_path(remote_ref, peer, flag) ) ) { - return ret; - } - - /* Reset the listener */ - if( ORTE_SUCCESS != (ret = orte_filem_base_listener_init(orte_filem_rsh_query_callback) ) ) { + if( ORTE_SUCCESS != (ret = orte_filem_base_get_remote_path(remote_ref, peer, flag) ) ) { return ret; } return ORTE_SUCCESS; } -/* - * This function is paired with the orte_filem_rsh_query_remote_path() function on the - * requesting machine. - * This function is responsible for: - * - Constructing the remote absolute path for the specified file/dir - * - Verify the existence of the file/dir - * - Determine if the specified file/dir is in fact a file or dir or unknown if not found. - * - */ -static void orte_filem_rsh_query_callback(int status, - orte_process_name_t* peer, - orte_buffer_t *buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - opal_output_verbose(15, mca_filem_rsh_component.super.output_handle, - "filem:rsh: query_callback(%s -> %s)", - ORTE_NAME_PRINT(orte_process_info.my_name), - ORTE_NAME_PRINT(peer)); - - /* Call the base callback function */ - orte_filem_base_query_callback(status, peer, buffer, tag, cbdata); - - /* Reset the listener */ - orte_filem_base_listener_init(orte_filem_rsh_query_callback); -} - /****************************** * Permission functions ******************************/ @@ -1074,7 +1077,7 @@ static int orte_filem_rsh_permission_listener_cancel(void) static void orte_filem_rsh_permission_callback(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata) { @@ -1085,9 +1088,9 @@ static void orte_filem_rsh_permission_callback(int status, int num_req, num_allowed = 0; int perm_flag, i; - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: permission_callback(? ?): Peer %s ...", - ORTE_NAME_PRINT(sender)); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: permission_callback(? ?): Peer %s ...", + ORTE_NAME_PRINT(sender))); /* * Receive the flag indicating if this is: @@ -1095,23 +1098,23 @@ static void orte_filem_rsh_permission_callback(int status, * - Allowing us to send (ORTE_FILEM_RSH_ALLOW) */ n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &perm_flag, &n, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &perm_flag, &n, OPAL_INT))) { goto cleanup; } /* Asking for permission to send */ if( ORTE_FILEM_RSH_ASK == perm_flag ) { - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: permission_callback(ASK): Peer %s Asking permission to send [Used %d of %d]", - ORTE_NAME_PRINT(sender), - cur_num_incomming, - orte_filem_rsh_max_incomming); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: permission_callback(ASK): Peer %s Asking permission to send [Used %d of %d]", + ORTE_NAME_PRINT(sender), + cur_num_incomming, + orte_filem_rsh_max_incomming)); /* * Receive the requested amount */ n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &num_req, &n, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &num_req, &n, OPAL_INT))) { goto cleanup; } @@ -1123,9 +1126,9 @@ static void orte_filem_rsh_permission_callback(int status, */ if( orte_filem_rsh_max_incomming < cur_num_incomming + 1) { /* Add to the waiting list */ - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: permission_callback(ASK): Add Peer %s request to waiting list", - ORTE_NAME_PRINT(sender)); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: permission_callback(ASK): Add Peer %s request to waiting list", + ORTE_NAME_PRINT(sender))); wp_item = OBJ_NEW(orte_filem_rsh_work_pool_item_t); wp_item->proc_set.source.jobid = sender->jobid; @@ -1138,24 +1141,24 @@ static void orte_filem_rsh_permission_callback(int status, num_allowed = 1; cur_num_incomming += 1; - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: permission_callback(ASK): Respond to Peer %s with %d", - ORTE_NAME_PRINT(sender), num_allowed); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: permission_callback(ASK): Respond to Peer %s with %d", + ORTE_NAME_PRINT(sender), num_allowed)); permission_send_num_allowed(sender, num_allowed); } } /* Allowing us to start some number of sends */ else if( ORTE_FILEM_RSH_ALLOW == perm_flag ) { - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: permission_callback(ALLOW): Peer %s Allowing me to send", - ORTE_NAME_PRINT(sender)); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: permission_callback(ALLOW): Peer %s Allowing me to send", + ORTE_NAME_PRINT(sender))); /* * Receive the allowed transmit amount */ n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &num_req, &n, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &num_req, &n, OPAL_INT))) { goto cleanup; } @@ -1166,9 +1169,9 @@ static void orte_filem_rsh_permission_callback(int status, */ for(i = 0; i < num_req; ++i ) { if( 0 >= opal_list_get_size(&work_pool_pending) ) { - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: permission_callback(ALLOW): No more pending sends to peer %s...", - ORTE_NAME_PRINT(sender)); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: permission_callback(ALLOW): No more pending sends to peer %s...", + ORTE_NAME_PRINT(sender))); break; } @@ -1184,13 +1187,13 @@ static void orte_filem_rsh_permission_callback(int status, } if( item == opal_list_get_end(&work_pool_pending) ) { - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: permission_callback(ALLOW): Unable to find message on the pending list\n"); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: permission_callback(ALLOW): Unable to find message on the pending list\n")); } - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: permission_callback(ALLOW): Starting to send to peer %s... (# pending = %d)", - ORTE_NAME_PRINT(sender), (int)opal_list_get_size(&work_pool_pending)); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: permission_callback(ALLOW): Starting to send to peer %s... (# pending = %d)", + ORTE_NAME_PRINT(sender), (int)opal_list_get_size(&work_pool_pending))); wp_item->active = true; opal_list_append(&work_pool_active, &(wp_item->super)); if( ORTE_SUCCESS != (ret = start_child(wp_item->command, @@ -1202,15 +1205,15 @@ static void orte_filem_rsh_permission_callback(int status, } /* Peer said they are done sending one or more files */ else if( ORTE_FILEM_RSH_DONE == perm_flag ) { - opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: permission_callback(DONE): Peer %s is done sending to me", - ORTE_NAME_PRINT(sender)); + OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, + "filem:rsh: permission_callback(DONE): Peer %s is done sending to me", + ORTE_NAME_PRINT(sender))); /* * Receive the number of open slots */ n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &num_req, &n, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &num_req, &n, OPAL_INT))) { goto cleanup; } @@ -1243,17 +1246,17 @@ static int orte_filem_rsh_permission_ask(orte_process_name_t* source, int num_sends) { int ret, exit_status = ORTE_SUCCESS; - orte_buffer_t loc_buffer; + opal_buffer_t loc_buffer; int perm_flag = ORTE_FILEM_RSH_ASK; - OBJ_CONSTRUCT(&loc_buffer, orte_buffer_t); + OBJ_CONSTRUCT(&loc_buffer, opal_buffer_t); - if (ORTE_SUCCESS != (ret = orte_dss.pack(&loc_buffer, &perm_flag, 1, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&loc_buffer, &perm_flag, 1, OPAL_INT))) { exit_status = ret; goto cleanup; } - if (ORTE_SUCCESS != (ret = orte_dss.pack(&loc_buffer, &num_sends, 1, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&loc_buffer, &num_sends, 1, OPAL_INT))) { exit_status = ret; goto cleanup; } @@ -1271,17 +1274,17 @@ static int orte_filem_rsh_permission_ask(orte_process_name_t* source, static int permission_send_done(orte_process_name_t* peer, int num_avail) { int ret, exit_status = ORTE_SUCCESS; - orte_buffer_t loc_buffer; + opal_buffer_t loc_buffer; int perm_flag = ORTE_FILEM_RSH_DONE; - OBJ_CONSTRUCT(&loc_buffer, orte_buffer_t); + OBJ_CONSTRUCT(&loc_buffer, opal_buffer_t); - if (ORTE_SUCCESS != (ret = orte_dss.pack(&loc_buffer, &perm_flag, 1, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&loc_buffer, &perm_flag, 1, OPAL_INT))) { exit_status = ret; goto cleanup; } - if (ORTE_SUCCESS != (ret = orte_dss.pack(&loc_buffer, &num_avail, 1, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&loc_buffer, &num_avail, 1, OPAL_INT))) { exit_status = ret; goto cleanup; } @@ -1300,17 +1303,17 @@ static int permission_send_done(orte_process_name_t* peer, int num_avail) { static int permission_send_num_allowed(orte_process_name_t* peer, int num_allowed) { int ret, exit_status = ORTE_SUCCESS; - orte_buffer_t loc_buffer; + opal_buffer_t loc_buffer; int perm_flag = ORTE_FILEM_RSH_ALLOW; - OBJ_CONSTRUCT(&loc_buffer, orte_buffer_t); + OBJ_CONSTRUCT(&loc_buffer, opal_buffer_t); - if (ORTE_SUCCESS != (ret = orte_dss.pack(&loc_buffer, &perm_flag, 1, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&loc_buffer, &perm_flag, 1, OPAL_INT))) { exit_status = ret; goto cleanup; } - if (ORTE_SUCCESS != (ret = orte_dss.pack(&loc_buffer, &num_allowed, 1, ORTE_INT))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&loc_buffer, &num_allowed, 1, OPAL_INT))) { exit_status = ret; goto cleanup; } diff --git a/orte/mca/gpr/base/Makefile.am b/orte/mca/gpr/base/Makefile.am deleted file mode 100644 index 95f1512198..0000000000 --- a/orte/mca/gpr/base/Makefile.am +++ /dev/null @@ -1,50 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Source code files - -headers += \ - base/base.h - -libmca_gpr_la_SOURCES += \ - base/gpr_base_open.c \ - base/gpr_base_close.c \ - base/gpr_base_create_value_keyval.c \ - base/gpr_base_select.c \ - base/gpr_base_simplified_put.c \ - base/gpr_base_simplified_subscribe.c \ - base/data_type_support/gpr_data_type_compare_fns.c \ - base/data_type_support/gpr_data_type_copy_fns.c \ - base/data_type_support/gpr_data_type_print_fns.c \ - base/data_type_support/gpr_data_type_release_fns.c \ - base/data_type_support/gpr_data_type_size_fns.c \ - base/data_type_support/gpr_data_type_packing_fns.c \ - base/data_type_support/gpr_data_type_unpacking_fns.c \ - base/unpack_api_response/gpr_base_unpack_cleanup.c \ - base/unpack_api_response/gpr_base_unpack_del_index.c \ - base/unpack_api_response/gpr_base_print_dump.c \ - base/unpack_api_response/gpr_base_dump_notify.c \ - base/unpack_api_response/gpr_base_unpack_arithmetic_ops.c \ - base/unpack_api_response/gpr_base_unpack_put_get.c \ - base/unpack_api_response/gpr_base_unpack_subscribe.c \ - base/pack_api_cmd/gpr_base_pack_cleanup.c \ - base/pack_api_cmd/gpr_base_pack_del_index.c \ - base/pack_api_cmd/gpr_base_pack_dump.c \ - base/pack_api_cmd/gpr_base_pack_arithmetic_ops.c \ - base/pack_api_cmd/gpr_base_pack_put_get.c \ - base/pack_api_cmd/gpr_base_pack_subscribe.c diff --git a/orte/mca/gpr/base/base.h b/orte/mca/gpr/base/base.h deleted file mode 100644 index cd55d38a9c..0000000000 --- a/orte/mca/gpr/base/base.h +++ /dev/null @@ -1,491 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry. - * - * The Open MPI system contains a general purpose registry for use by both - * applications and internal systems to dynamically share information. For - * speed purposes, the registry is divided into "segments", each labelled - * with an appropriate "token" string that describes its contents. Segments - * are automatically provided for the "universe" and for each MPI CommWorld. - * At this time, all segments may be accessed by any application within the universe, thus - * providing a mechanism for cross-CommWorld communications (with the requirement - * that all participating CommWorlds must reside within the same universe). In the future, - * some form of security may be provided to limit access privileges between - * segments. - * - * Within each registry segment, there exists a list of objects that have - * been "put" onto the registry. Each object must be tagged with at least - * one token, but may be tagged with as many tokens as the creator desires. - * Retrieval requests must specify the segment and at least one token, but - * can specify an arbitrary number of tokens to describe the search. The registry - * will return a list of all objects that meet the search criteria. - * - * Tokens are defined as character strings, thus allowing for clarity in - * the program. However, for speed purposes, tokens are translated into - * integer keys prior to storing an object. A table of token-key pairs - * is independently maintained for each registry segment. Users can obtain - * an index of tokens within a dictionary by requesting it through the orte_registry_index() - * function. - * - * The registry also provides a subscription capability whereby a caller - * can subscribe to a stored object and receive notification when various actions - * are performed on that object. Currently supported actions include modification, - * the addition of another subscriber, and deletion. Notifications are sent via - * the OOB communication channel. - * - * - */ - -#ifndef ORTE_GPR_BASE_H_ -#define ORTE_GPR_BASE_H_ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/class/opal_list.h" -#include "orte/dss/dss_types.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" -#include "orte/mca/rml/rml_types.h" - -#include "orte/mca/gpr/gpr.h" - -/* - * Global functions for MCA overall collective open and close - */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Define flag values for remote commands - */ -#define ORTE_GPR_DELETE_SEGMENT_CMD (uint8_t) 1 -#define ORTE_GPR_PUT_CMD (uint8_t) 2 -#define ORTE_GPR_DELETE_ENTRIES_CMD (uint8_t) 3 -#define ORTE_GPR_INDEX_CMD (uint8_t) 4 -#define ORTE_GPR_SUBSCRIBE_CMD (uint8_t) 5 -#define ORTE_GPR_UNSUBSCRIBE_CMD (uint8_t) 6 -#define ORTE_GPR_CANCEL_TRIGGER_CMD (uint8_t) 7 -#define ORTE_GPR_GET_CMD (uint8_t) 8 -#define ORTE_GPR_GET_CONDITIONAL_CMD (uint8_t) 9 -#define ORTE_GPR_TEST_INTERNALS_CMD (uint8_t) 10 -#define ORTE_GPR_NOTIFY_CMD (uint8_t) 11 -#define ORTE_GPR_DUMP_ALL_CMD (uint8_t) 12 -#define ORTE_GPR_DUMP_SEGMENTS_CMD (uint8_t) 13 -#define ORTE_GPR_DUMP_TRIGGERS_CMD (uint8_t) 14 -#define ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD (uint8_t) 15 -#define ORTE_GPR_DUMP_CALLBACKS_CMD (uint8_t) 16 -#define ORTE_GPR_ARITH_CMD (uint8_t) 17 -#define ORTE_GPR_INCREMENT_VALUE_CMD (uint8_t) 18 -#define ORTE_GPR_DECREMENT_VALUE_CMD (uint8_t) 19 -#define ORTE_GPR_COMPOUND_CMD (uint8_t) 20 -#define ORTE_GPR_CLEANUP_JOB_CMD (uint8_t) 21 -#define ORTE_GPR_CLEANUP_PROC_CMD (uint8_t) 22 -#define ORTE_GPR_DUMP_A_TRIGGER_CMD (uint8_t) 23 -#define ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD (uint8_t) 24 -#define ORTE_GPR_DUMP_SEGMENT_SIZE_CMD (uint8_t) 25 -#define ORTE_GPR_GET_NUMBER_ENTRIES_CMD (uint8_t) 26 -#define ORTE_GPR_ERROR (uint8_t)0xff - -typedef uint8_t orte_gpr_cmd_flag_t; -#define ORTE_GPR_CMD_T ORTE_UINT8 - - ORTE_DECLSPEC int orte_gpr_base_open(void); - ORTE_DECLSPEC int orte_gpr_base_select(void); - ORTE_DECLSPEC int orte_gpr_base_close(void); - - ORTE_DECLSPEC int orte_gpr_base_create_value(orte_gpr_value_t **value, - orte_gpr_addr_mode_t addr_mode, - char *segment, - orte_std_cntr_t cnt, /**< Number of keyval objects */ - orte_std_cntr_t num_tokens); - - ORTE_DECLSPEC int orte_gpr_base_create_keyval(orte_gpr_keyval_t **keyval, - char *key, - orte_data_type_t type, - void *data); - - ORTE_DECLSPEC int orte_gpr_base_put_1(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - char *key, orte_data_value_t *value); - - - ORTE_DECLSPEC int orte_gpr_base_put_N(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - orte_std_cntr_t n, char **keys, - orte_data_value_t **data_values); - - ORTE_DECLSPEC int orte_gpr_base_subscribe_1(orte_gpr_subscription_id_t *id, - char *trig_name, - char *sub_name, - orte_gpr_notify_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - char *key, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag); - - - ORTE_DECLSPEC int orte_gpr_base_subscribe_N(orte_gpr_subscription_id_t *id, - char *trig_name, - char *sub_name, - orte_gpr_notify_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag); - - - ORTE_DECLSPEC int orte_gpr_base_define_trigger(orte_gpr_trigger_id_t *id, - char *trig_name, - orte_gpr_trigger_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_gpr_trigger_cb_fn_t cbfunc, - void *user_tag); - - ORTE_DECLSPEC int orte_gpr_base_define_trigger_level(orte_gpr_trigger_id_t *id, - char *trig_name, - orte_gpr_trigger_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_std_cntr_t *levels, - orte_gpr_trigger_cb_fn_t cbfunc, - void *user_tag); - - /* general usage functions */ - ORTE_DECLSPEC int orte_gpr_base_pack_delete_segment(orte_buffer_t *cmd, - char *segment); - ORTE_DECLSPEC int orte_gpr_base_unpack_delete_segment(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_delete_entries(orte_buffer_t *buffer, - orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys); - ORTE_DECLSPEC int orte_gpr_base_unpack_delete_entries(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_index(orte_buffer_t *cmd, char *segment); - ORTE_DECLSPEC int orte_gpr_base_unpack_index(orte_buffer_t *cmd, int *ret, orte_std_cntr_t *cnt, - char ***index); - - ORTE_DECLSPEC int orte_gpr_base_pack_subscribe(orte_buffer_t *cmd, - orte_std_cntr_t num_subs, - orte_gpr_subscription_t **subscriptions, - orte_std_cntr_t num_trigs, orte_gpr_trigger_t **trig); - ORTE_DECLSPEC int orte_gpr_base_unpack_subscribe(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_unsubscribe(orte_buffer_t *cmd, - orte_gpr_subscription_id_t id); - ORTE_DECLSPEC int orte_gpr_base_unpack_unsubscribe(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_cancel_trigger(orte_buffer_t *cmd, - orte_gpr_trigger_id_t id); - ORTE_DECLSPEC int orte_gpr_base_unpack_cancel_trigger(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_put(orte_buffer_t *cmd, - orte_std_cntr_t cnt, orte_gpr_value_t **values); - ORTE_DECLSPEC int orte_gpr_base_unpack_put(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_get(orte_buffer_t *cmd, - orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys); - ORTE_DECLSPEC int orte_gpr_base_pack_get_conditional(orte_buffer_t *cmd, - orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t num_conditions, orte_gpr_keyval_t **conditions); - - ORTE_DECLSPEC int orte_gpr_base_unpack_get(orte_buffer_t *buffer, int *ret, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values); - - ORTE_DECLSPEC int orte_gpr_base_pack_dump_all(orte_buffer_t *cmd); - ORTE_DECLSPEC int orte_gpr_base_pack_dump_segments(orte_buffer_t *cmd, char *segment); - ORTE_DECLSPEC int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd, - orte_gpr_trigger_id_t start); - ORTE_DECLSPEC int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd, - orte_gpr_subscription_id_t start); - ORTE_DECLSPEC int orte_gpr_base_pack_dump_a_trigger(orte_buffer_t *cmd, - char *name, orte_gpr_trigger_id_t id); - ORTE_DECLSPEC int orte_gpr_base_pack_dump_a_subscription(orte_buffer_t *cmd, - char *name, - orte_gpr_subscription_id_t id); - ORTE_DECLSPEC int orte_gpr_base_pack_dump_callbacks(orte_buffer_t *cmd); - - ORTE_DECLSPEC int orte_gpr_base_print_dump(orte_buffer_t *buffer); - - ORTE_DECLSPEC int orte_gpr_base_dump_keyval_value(orte_buffer_t *buffer, - orte_gpr_keyval_t *iptr); - ORTE_DECLSPEC int orte_gpr_base_pack_dump_segment_size(orte_buffer_t *cmd, char *segment); - ORTE_DECLSPEC int orte_gpr_base_dump_notify_msg(orte_buffer_t *buffer, - orte_gpr_notify_message_t *msg); - ORTE_DECLSPEC int orte_gpr_base_dump_notify_data(orte_buffer_t *buffer, - orte_gpr_notify_data_t *data); - ORTE_DECLSPEC int orte_gpr_base_dump_value(orte_buffer_t *buffer, - orte_gpr_value_t *value); - - ORTE_DECLSPEC int orte_gpr_base_pack_cleanup_job(orte_buffer_t *buffer, - orte_jobid_t jobid); - ORTE_DECLSPEC int orte_gpr_base_unpack_cleanup_job(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_cleanup_proc(orte_buffer_t *buffer, - orte_process_name_t *proc); - ORTE_DECLSPEC int orte_gpr_base_unpack_cleanup_proc(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_arith(orte_buffer_t *cmd, - orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_dss_arith_op_t operation, - orte_data_value_t *operand); - ORTE_DECLSPEC int orte_gpr_base_unpack_arith(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_increment_value(orte_buffer_t *cmd, orte_gpr_value_t *value); - ORTE_DECLSPEC int orte_gpr_base_unpack_increment_value(orte_buffer_t *buffer, int *ret); - - ORTE_DECLSPEC int orte_gpr_base_pack_decrement_value(orte_buffer_t *cmd, orte_gpr_value_t *value); - ORTE_DECLSPEC int orte_gpr_base_unpack_decrement_value(orte_buffer_t *buffer, int *ret); - -/* GPR DATA TYPE PACKING FUNCTIONS */ -int orte_gpr_base_pack_cmd(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_subscription_id(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_trigger_id(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_notify_action(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_trigger_action(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_notify_msg_type(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_addr_mode(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_keyval(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_value(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_subscription(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_trigger(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_notify_data(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_gpr_base_pack_notify_msg(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -/* GPR DATA TYPE UNPACKING FUNCTIONS */ -int orte_gpr_base_unpack_cmd(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_subscription_id(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_trigger_id(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_notify_action(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_trigger_action(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_addr_mode(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_notify_msg_type(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_keyval(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_value(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_subscription(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_trigger(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_notify_data(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_gpr_base_unpack_notify_msg(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - - -/* GPR DATA TYPE RELEASE FUNCTIONS */ -void orte_gpr_base_std_release(orte_data_value_t *value); - -void orte_gpr_base_std_obj_release(orte_data_value_t *value); - -/* GPR DATA TYPE COMPARE FUNCTIONS */ -int orte_gpr_base_compare_cmd(orte_gpr_cmd_flag_t *value1, - orte_gpr_cmd_flag_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_subscription_id(orte_gpr_subscription_id_t *value1, - orte_gpr_subscription_id_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_trigger_id(orte_gpr_trigger_id_t *value1, - orte_gpr_trigger_id_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_notify_action(orte_gpr_notify_action_t *value1, - orte_gpr_notify_action_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_trigger_action(orte_gpr_trigger_action_t *value1, - orte_gpr_trigger_action_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_notify_msg_type(orte_gpr_notify_msg_type_t *value1, - orte_gpr_notify_msg_type_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_addr_mode(orte_gpr_addr_mode_t *value1, - orte_gpr_addr_mode_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_keyval(orte_gpr_keyval_t *value1, - orte_gpr_keyval_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_gpr_value(orte_gpr_value_t *value1, - orte_gpr_value_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_subscription(orte_gpr_subscription_t *value1, - orte_gpr_subscription_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_trigger(orte_gpr_trigger_t *value1, - orte_gpr_trigger_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_notify_data(orte_gpr_notify_data_t *value1, - orte_gpr_notify_data_t *value2, - orte_data_type_t type); - -int orte_gpr_base_compare_notify_msg(orte_gpr_notify_message_t *value1, - orte_gpr_notify_message_t *value2, - orte_data_type_t type); - -/* GPR DATA TYPE COPY FUNCTIONS */ -int orte_gpr_base_copy_cmd(orte_gpr_cmd_flag_t **dest, orte_gpr_cmd_flag_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_subscription_id(orte_gpr_subscription_id_t **dest, orte_gpr_subscription_id_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_trigger_id(orte_gpr_trigger_id_t **dest, orte_gpr_trigger_id_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_notify_action(orte_gpr_notify_action_t **dest, orte_gpr_notify_action_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_trigger_action(orte_gpr_trigger_action_t **dest, orte_gpr_trigger_action_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_notify_msg_type(orte_gpr_notify_msg_type_t **dest, orte_gpr_notify_msg_type_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_addr_mode(orte_gpr_addr_mode_t **dest, orte_gpr_addr_mode_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_keyval(orte_gpr_keyval_t **dest, orte_gpr_keyval_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_gpr_value(orte_gpr_value_t **dest, orte_gpr_value_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_subscription(orte_gpr_subscription_t **dest, orte_gpr_subscription_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_trigger(orte_gpr_trigger_t **dest, orte_gpr_trigger_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_notify_data(orte_gpr_notify_data_t **dest, orte_gpr_notify_data_t *src, orte_data_type_t type); - -int orte_gpr_base_copy_notify_msg(orte_gpr_notify_message_t **dest, orte_gpr_notify_message_t *src, orte_data_type_t type); - -/* GPR DATA TYPE SIZE FUNCTIONS */ -int orte_gpr_base_std_size(size_t *size, void *src, orte_data_type_t type); - -int orte_gpr_base_size_keyval(size_t *size, orte_gpr_keyval_t *src, orte_data_type_t type); - -int orte_gpr_base_size_gpr_value(size_t *size, orte_gpr_value_t *src, orte_data_type_t type); - -int orte_gpr_base_size_subscription(size_t *size, orte_gpr_subscription_t *src, orte_data_type_t type); - -int orte_gpr_base_size_trigger(size_t *size, orte_gpr_trigger_t *src, orte_data_type_t type); - -int orte_gpr_base_size_notify_data(size_t *size, orte_gpr_notify_data_t *src, orte_data_type_t type); - -int orte_gpr_base_size_notify_msg(size_t *size, orte_gpr_notify_message_t *src, orte_data_type_t type); - -/* GPR DATA TYPE PRINT FUNCTIONS */ -int orte_gpr_base_std_print(char **output, char *prefix, void *src, orte_data_type_t type); - -int orte_gpr_base_print_keyval(char **output, char *prefix, orte_gpr_keyval_t *src, orte_data_type_t type); - -int orte_gpr_base_print_gpr_value(char **output, char *prefix, orte_gpr_value_t *value, orte_data_type_t type); - -int orte_gpr_base_print_subscription(char **output, char *prefix, orte_gpr_subscription_t *sub, orte_data_type_t type); - -int orte_gpr_base_print_trigger(char **output, char *prefix, orte_gpr_trigger_t *trig, orte_data_type_t type); - -int orte_gpr_base_print_notify_data(char **output, char *prefix, orte_gpr_notify_data_t *data, orte_data_type_t type); - -int orte_gpr_base_print_notify_msg(char **output, char *prefix, orte_gpr_notify_message_t *msg, orte_data_type_t type); - - -/* - * globals that might be needed inside the gpr - */ -ORTE_DECLSPEC extern int orte_gpr_base_output; -ORTE_DECLSPEC extern size_t orte_gpr_array_max_size, orte_gpr_array_block_size; -ORTE_DECLSPEC extern bool orte_gpr_base_selected; -ORTE_DECLSPEC extern opal_list_t orte_gpr_base_components_available; -ORTE_DECLSPEC extern mca_gpr_base_component_t orte_gpr_base_selected_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/gpr/base/data_type_support/gpr_data_type_compare_fns.c b/orte/mca/gpr/base/data_type_support/gpr_data_type_compare_fns.c deleted file mode 100755 index 06a6a47887..0000000000 --- a/orte/mca/gpr/base/data_type_support/gpr_data_type_compare_fns.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -/* - * NUMERIC COMPARE FUNCTIONS - */ -int orte_gpr_base_compare_cmd(orte_gpr_cmd_flag_t *value1, - orte_gpr_cmd_flag_t *value2, - orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_subscription_id(orte_gpr_subscription_id_t *value1, - orte_gpr_subscription_id_t *value2, - orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_trigger_id(orte_gpr_trigger_id_t *value1, - orte_gpr_trigger_id_t *value2, - orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_notify_action(orte_gpr_notify_action_t *value1, - orte_gpr_notify_action_t *value2, - orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_trigger_action(orte_gpr_trigger_action_t *value1, - orte_gpr_trigger_action_t *value2, - orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_notify_msg_type(orte_gpr_notify_msg_type_t *value1, - orte_gpr_notify_msg_type_t *value2, - orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_addr_mode(orte_gpr_addr_mode_t *value1, - orte_gpr_addr_mode_t *value2, - orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -/* NON-NUMERIC COMPARE FUNCTIONS */ - - -int orte_gpr_base_compare_keyval(orte_gpr_keyval_t *value1, - orte_gpr_keyval_t *value2, - orte_data_type_t type) -{ - int rc; - - /* check to see if the keys are the same */ - if (0 != (rc = strcmp(value1->key, value2->key))) { - if (0 < rc) return ORTE_VALUE1_GREATER; - return ORTE_VALUE2_GREATER; - } - - /* okay, keys are the same - compare the values using their native comparators */ - return orte_dss.compare(value1->value, value2->value, ORTE_DATA_VALUE); - -} - -int orte_gpr_base_compare_gpr_value(orte_gpr_value_t *value1, - orte_gpr_value_t *value2, - orte_data_type_t type) -{ - /* no real way to do this right now */ - ORTE_ERROR_LOG(ORTE_ERR_COMPARE_FAILURE); - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_subscription(orte_gpr_subscription_t *value1, - orte_gpr_subscription_t *value2, - orte_data_type_t type) -{ - /* no real way to do this right now */ - ORTE_ERROR_LOG(ORTE_ERR_COMPARE_FAILURE); - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_trigger(orte_gpr_trigger_t *value1, - orte_gpr_trigger_t *value2, - orte_data_type_t type) -{ - /* no real way to do this right now */ - ORTE_ERROR_LOG(ORTE_ERR_COMPARE_FAILURE); - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_notify_data(orte_gpr_notify_data_t *value1, - orte_gpr_notify_data_t *value2, - orte_data_type_t type) -{ - /* no real way to do this right now */ - ORTE_ERROR_LOG(ORTE_ERR_COMPARE_FAILURE); - return ORTE_EQUAL; -} - -int orte_gpr_base_compare_notify_msg(orte_gpr_notify_message_t *value1, - orte_gpr_notify_message_t *value2, - orte_data_type_t type) -{ - /* no real way to do this right now */ - ORTE_ERROR_LOG(ORTE_ERR_COMPARE_FAILURE); - return ORTE_EQUAL; -} diff --git a/orte/mca/gpr/base/data_type_support/gpr_data_type_copy_fns.c b/orte/mca/gpr/base/data_type_support/gpr_data_type_copy_fns.c deleted file mode 100755 index e51220f6f0..0000000000 --- a/orte/mca/gpr/base/data_type_support/gpr_data_type_copy_fns.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -/* - * COPY FUNCTIONS FOR EVERYTHING NON-STRUCTURED - */ -int orte_gpr_base_copy_cmd(orte_gpr_cmd_flag_t **dest, orte_gpr_cmd_flag_t *src, orte_data_type_t type) -{ - int rc; - - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)dest, (void*)src, ORTE_GPR_CMD_T))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_copy_subscription_id(orte_gpr_subscription_id_t **dest, orte_gpr_subscription_id_t *src, orte_data_type_t type) -{ - int rc; - - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)dest, (void*)src, ORTE_GPR_SUBSCRIPTION_ID_T))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_copy_trigger_id(orte_gpr_trigger_id_t **dest, orte_gpr_trigger_id_t *src, orte_data_type_t type) -{ - int rc; - - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)dest, (void*)src, ORTE_GPR_TRIGGER_ID_T))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_copy_notify_action(orte_gpr_notify_action_t **dest, orte_gpr_notify_action_t *src, orte_data_type_t type) -{ - int rc; - - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)dest, (void*)src, ORTE_GPR_NOTIFY_ACTION_T))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_copy_trigger_action(orte_gpr_trigger_action_t **dest, orte_gpr_trigger_action_t *src, orte_data_type_t type) -{ - int rc; - - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)dest, (void*)src, ORTE_GPR_TRIGGER_ACTION_T))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_copy_notify_msg_type(orte_gpr_notify_msg_type_t **dest, orte_gpr_notify_msg_type_t *src, orte_data_type_t type) -{ - int rc; - - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)dest, (void*)src, ORTE_GPR_NOTIFY_MSG_TYPE_T))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_copy_addr_mode(orte_gpr_addr_mode_t **dest, orte_gpr_addr_mode_t *src, orte_data_type_t type) -{ - int rc; - - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)dest, (void*)src, ORTE_GPR_ADDR_MODE_T))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - - -/* COPY FUNCTIONS FOR COMPLEX TYPES */ -/* KEYVAL */ -int orte_gpr_base_copy_keyval(orte_gpr_keyval_t **dest, orte_gpr_keyval_t *src, orte_data_type_t type) -{ - orte_gpr_keyval_t *kval; - int rc; - - /* create the new object */ - kval = OBJ_NEW(orte_gpr_keyval_t); - if (NULL == kval) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - kval->value = OBJ_NEW(orte_data_value_t); - if (NULL == kval->value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (NULL != src->key) { - kval->key = strdup(src->key); - } - - /* if there is data in the src, copy it */ - if (NULL != src->value) { - kval->value->type = src->value->type; - - /* copy the data itself - use the appropriate copy function here */ - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(kval->value->data), src->value->data, src->value->type))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(kval); - *dest = NULL; - return rc; - } - } - - *dest = kval; - return ORTE_SUCCESS; -} - -/* VALUE */ -int orte_gpr_base_copy_gpr_value(orte_gpr_value_t **dest, orte_gpr_value_t *src, orte_data_type_t type) -{ - int rc; - orte_gpr_keyval_t **kvals; - char **tokens; - orte_std_cntr_t i; - - /* create the new object */ - *dest = OBJ_NEW(orte_gpr_value_t); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* copy the data into it */ - (*dest)->addr_mode = src->addr_mode; - if (NULL != src->segment) { - (*dest)->segment = strdup(src->segment); - } - (*dest)->cnt = src->cnt; - ; - /* copy the keyvals - use the appropriate copy function here */ - if (0 < src->cnt) { - kvals = (orte_gpr_keyval_t**)malloc(src->cnt * sizeof(orte_gpr_keyval_t*)); /* allocate space for the pointers */ - if (NULL == kvals) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(*dest); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (*dest)->keyvals = kvals; - for (i=0; i < src->cnt; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_copy_keyval(&kvals[i], - src->keyvals[i], ORTE_GPR_KEYVAL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*dest); - return rc; - } - } - } - - (*dest)->num_tokens = src->num_tokens; - /* copy the tokens - use the appropriate copy function here */ - if (0 < src->num_tokens) { - tokens = (char**)malloc(src->num_tokens * sizeof(char*)); /* allocate space for the pointers */ - if (NULL == tokens) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(*dest); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (*dest)->tokens = tokens; - for (i=0; i < src->num_tokens; i++) { - tokens[i] = strdup(src->tokens[i]); - } - } - - return ORTE_SUCCESS; -} - - -/* SUBSCRIPTION */ -int orte_gpr_base_copy_subscription(orte_gpr_subscription_t **dest, orte_gpr_subscription_t *src, orte_data_type_t type) -{ - orte_std_cntr_t i; - orte_gpr_value_t **values; - int rc; - - /* create the new object */ - *dest = OBJ_NEW(orte_gpr_subscription_t); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (NULL != src->name) { - (*dest)->name = strdup(src->name); - } - (*dest)->id = src->id; - (*dest)->action = src->action; - (*dest)->cnt = src->cnt; - - /* copy the values - use the appropriate copy function here */ - if (0 < src->cnt) { - values = (orte_gpr_value_t**)malloc(src->cnt * sizeof(orte_gpr_value_t*)); - if (NULL == values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(*dest); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (*dest)->values = values; - for (i=0; i < src->cnt; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_copy_gpr_value(&values[i], src->values[i], ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*dest); - return rc; - } - } - } - - (*dest)->cbfunc = src->cbfunc; - (*dest)->user_tag = src->user_tag; - - return ORTE_SUCCESS; -} - -/* TRIGGER */ -int orte_gpr_base_copy_trigger(orte_gpr_trigger_t **dest, orte_gpr_trigger_t *src, orte_data_type_t type) -{ - orte_std_cntr_t i; - orte_gpr_value_t **values; - int rc; - - /* create the new object */ - *dest = OBJ_NEW(orte_gpr_trigger_t); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (NULL != src->name) { - (*dest)->name = strdup(src->name); - } - (*dest)->id = src->id; - (*dest)->action = src->action; - (*dest)->cnt = src->cnt; - - /* copy the values - use the appropriate copy function here */ - if (0 < src->cnt) { - values = (orte_gpr_value_t**)malloc(src->cnt * sizeof(orte_gpr_value_t*)); - if (NULL == values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(*dest); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (*dest)->values = values; - for (i=0; i < src->cnt; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_copy_gpr_value(&values[i], src->values[i], ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*dest); - return rc; - } - } - } - - (*dest)->cbfunc = src->cbfunc; - (*dest)->user_tag = src->user_tag; - - return ORTE_SUCCESS; -} - -/* NOTIFY DATA */ -int orte_gpr_base_copy_notify_data(orte_gpr_notify_data_t **dest, orte_gpr_notify_data_t *src, orte_data_type_t type) -{ - orte_std_cntr_t j, k, index; - orte_gpr_value_t **val, *ptr; - int rc; - - /* create the new object */ - *dest = OBJ_NEW(orte_gpr_notify_data_t); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* copy the data to it */ - if (NULL != src->target) { - (*dest)->target = strdup(src->target); - } - (*dest)->id = src->id; - (*dest)->remove = src->remove; - (*dest)->cnt = src->cnt; - - /* copy the values */ - val = (orte_gpr_value_t**)(src->values)->addr; - for (j=0, k=0; j < src->cnt && - k < (src->values)->size; k++) { - if (NULL != val[k]) { - j++; - /* copy the value object */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_copy_gpr_value(&ptr, val[k], ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*dest); - return rc; - } - /* ...and add it to the pointer array */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&index, (*dest)->values, ptr))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*dest); - return rc; - } - } - } - - return ORTE_SUCCESS; -} - -/* NOTIFY MSG */ -int orte_gpr_base_copy_notify_msg(orte_gpr_notify_message_t **dest, orte_gpr_notify_message_t *src, orte_data_type_t type) -{ - orte_std_cntr_t j, k, index; - orte_gpr_notify_data_t **val, *ptr; - int rc; - - /* create the new object */ - *dest = OBJ_NEW(orte_gpr_notify_message_t); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - (*dest)->msg_type = src->msg_type; - if (NULL != src->target) { - (*dest)->target = strdup(src->target); - } - (*dest)->id = src->id; - (*dest)->remove = src->remove; - (*dest)->cnt = src->cnt; - - /* copy the notify data entries */ - val = (orte_gpr_notify_data_t**)(src->data)->addr; - for (j=0, k=0; j < src->cnt && - k < (src->data)->size; k++) { - if (NULL != val[k]) { - j++; - /* copy the data object */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_copy_notify_data(&ptr, val[k], ORTE_GPR_NOTIFY_DATA))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*dest); - return rc; - } - /* ...and add it to the pointer array */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&index, (*dest)->data, ptr))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*dest); - return rc; - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/data_type_support/gpr_data_type_packing_fns.c b/orte/mca/gpr/base/data_type_support/gpr_data_type_packing_fns.c deleted file mode 100644 index 001686ef9b..0000000000 --- a/orte/mca/gpr/base/data_type_support/gpr_data_type_packing_fns.c +++ /dev/null @@ -1,510 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "opal/util/trace.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss_internal.h" - -#include "orte/mca/gpr/base/base.h" - -/* - * GPR CMD - */ -int orte_gpr_base_pack_cmd(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_GPR_CMD_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * SUBSCRIPTION ID - */ -int orte_gpr_base_pack_subscription_id(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_GPR_SUBSCRIPTION_ID_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * TRIGGER ID - */ -int orte_gpr_base_pack_trigger_id(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_GPR_TRIGGER_ID_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * NOTIFY ACTION - */ -int orte_gpr_base_pack_notify_action(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_GPR_NOTIFY_ACTION_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * TRIGGER ACTION - */ -int orte_gpr_base_pack_trigger_action(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_GPR_TRIGGER_ACTION_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * ADDR MODE - */ -int orte_gpr_base_pack_addr_mode(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_GPR_ADDR_MODE_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * NOTIFY MSG TYPE - */ -int orte_gpr_base_pack_notify_msg_type(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_GPR_NOTIFY_MSG_TYPE_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - - -/* - * KEYVAL - */ -int orte_gpr_base_pack_keyval(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_keyval_t **keyval; - orte_std_cntr_t i; - - OPAL_TRACE(4); - - /* array of pointers to keyval objects - need to pack the - objects */ - keyval = (orte_gpr_keyval_t**) src; - - for (i=0; i < num_vals; i++) { - /* pack the key */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(keyval[i]->key)), 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the data value */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(keyval[i]->value), 1, ORTE_DATA_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - return ORTE_SUCCESS; -} - -/* - * VALUE - */ -int orte_gpr_base_pack_value(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_value_t **values; - orte_std_cntr_t i; - - OPAL_TRACE(4); - - /* array of pointers to value objects - need to pack the objects */ - values = (orte_gpr_value_t**) src; - for (i=0; iaddr_mode)), 1, ORTE_GPR_ADDR_MODE))) { - ORTE_ERROR_LOG(rc); - return ORTE_ERROR; - } - - /* pack the segment name */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(values[i]->segment)), 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return ORTE_ERROR; - } - - /* pack the number of tokens so we can read it for unpacking */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(values[i]->num_tokens)), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return ORTE_ERROR; - } - - /* if there are tokens, pack them */ - if (0 < values[i]->num_tokens) { - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)((values[i]->tokens)), values[i]->num_tokens, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return ORTE_ERROR; - } - } - - /* pack the number of keyval pairs so we can read it for unpacking */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(values[i]->cnt)), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return ORTE_ERROR; - } - - /* if there are keyval pairs, pack them */ - if (0 < values[i]->cnt) { - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)((values[i]->keyvals)), values[i]->cnt, ORTE_GPR_KEYVAL))) { - ORTE_ERROR_LOG(rc); - return ORTE_ERROR; - } - } - } - - return ORTE_SUCCESS; -} - -/* - * SUBSCRIPTION - */ -int orte_gpr_base_pack_subscription(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_subscription_t **subs; - orte_std_cntr_t i; - - OPAL_TRACE(4); - - /* array of pointers to subscription objects - need to pack the objects */ - subs = (orte_gpr_subscription_t**) src; - for (i=0; iname)), 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the subscription id */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(subs[i]->id)), 1, ORTE_GPR_SUBSCRIPTION_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the notify action */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(subs[i]->action)), 1, ORTE_GPR_NOTIFY_ACTION))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the number of values so we can read it for unpacking */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(subs[i]->cnt)), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are values, pack them */ - if (0 < subs[i]->cnt) { - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)((subs[i]->values)), subs[i]->cnt, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* skip the pointers for cb_func and user_tag */ - } - - return ORTE_SUCCESS; -} - -/* - * TRIGGER - */ -int orte_gpr_base_pack_trigger(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_trigger_t **trigs; - orte_std_cntr_t i; - - OPAL_TRACE(4); - - /* array of pointers to trigger objects - need to pack the objects */ - trigs = (orte_gpr_trigger_t**) src; - for (i=0; iname)), 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the trigger id */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(trigs[i]->id)), 1, ORTE_GPR_TRIGGER_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the trigger action */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(trigs[i]->action)), 1, ORTE_GPR_TRIGGER_ACTION))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the number of values so we can read it for unpacking */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(trigs[i]->cnt)), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are values, pack the values */ - if (0 < trigs[i]->cnt) { - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)((trigs[i]->values)), trigs[i]->cnt, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - - return ORTE_SUCCESS; -} - -/* - * NOTIFY DATA - */ -int orte_gpr_base_pack_notify_data(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_notify_data_t **data; - orte_gpr_value_t **values; - orte_std_cntr_t i, j, k; - - OPAL_TRACE(4); - - /* array of pointers to notify data objects - need to pack the objects */ - data = (orte_gpr_notify_data_t**) src; - - for (i=0; itarget)), 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the subscription number */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(data[i]->id)), 1, ORTE_GPR_SUBSCRIPTION_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the remove flag */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(data[i]->remove)), 1, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the number of values so we can read it for unpacking */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(data[i]->cnt)), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are values, pack the values */ - if (0 < data[i]->cnt) { - values = (orte_gpr_value_t**)(data[i]->values)->addr; - for (j=0, k=0; k < data[i]->cnt && - j < (data[i]->values)->size; j++) { - if (NULL != values[j]) { - k++; - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &values[j], - 1, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - } - - return ORTE_SUCCESS; -} - - -/* - * NOTIFY MSG - */ -int orte_gpr_base_pack_notify_msg(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_notify_message_t **msg; - orte_gpr_notify_data_t **data; - orte_std_cntr_t i, j, k; - - OPAL_TRACE(4); - - /* array of messages */ - msg = (orte_gpr_notify_message_t**) src; - - for (i=0; imsg_type)), 1, ORTE_GPR_NOTIFY_MSG_TYPE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the trigger name */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(msg[i]->target)), 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the trigger number */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(msg[i]->id)), 1, ORTE_GPR_TRIGGER_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the remove flag */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(msg[i]->remove)), 1, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the number of datagrams so we can read it for unpacking */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(msg[i]->cnt)), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are datagrams, pack them */ - if (0 < msg[i]->cnt) { - /* array of pointers to notify data objects - need to pack the objects. - * to do this, we assume that the array objects are continguous - * in the pointer array. the pointer array itself does not - * guarantee this property - we are exploiting, however, our knowledge - * of how these messages are constructed. - */ - data = (orte_gpr_notify_data_t**)(msg[i]->data)->addr; - for (j=0, k=0; k < msg[i]->cnt && - j < (msg[i]->data)->size; j++) { - if (NULL != data[j]) { - k++; - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(data[j]), - 1, ORTE_GPR_NOTIFY_DATA))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/data_type_support/gpr_data_type_print_fns.c b/orte/mca/gpr/base/data_type_support/gpr_data_type_print_fns.c deleted file mode 100755 index b6a18742ce..0000000000 --- a/orte/mca/gpr/base/data_type_support/gpr_data_type_print_fns.c +++ /dev/null @@ -1,570 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -static void orte_gpr_base_quick_print(char **output, char *type_name, char *prefix, void *src, orte_std_cntr_t src_size); - -/* - * STANDARD PRINT FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ -int orte_gpr_base_std_print(char **output, char *prefix, void *src, orte_data_type_t type) -{ - /* set default result */ - *output = NULL; - - switch(type) { - case ORTE_GPR_CMD: - orte_gpr_base_quick_print(output, "ORTE_GPR_CMD", prefix, src, sizeof(orte_gpr_cmd_flag_t)); - break; - - case ORTE_GPR_SUBSCRIPTION_ID: - orte_gpr_base_quick_print(output, "ORTE_GPR_SUBSCRIPTION_ID", prefix, src, sizeof(orte_gpr_subscription_id_t)); - break; - - case ORTE_GPR_TRIGGER_ID: - orte_gpr_base_quick_print(output, "ORTE_GPR_TRIGGER_ID", prefix, src, sizeof(orte_gpr_trigger_id_t)); - break; - - case ORTE_GPR_NOTIFY_ACTION: - orte_gpr_base_quick_print(output, "ORTE_GPR_NOTIFY_ACTION", prefix, src, sizeof(orte_gpr_notify_action_t)); - break; - - case ORTE_GPR_TRIGGER_ACTION: - orte_gpr_base_quick_print(output, "ORTE_GPR_TRIGGER_ACTION", prefix, src, sizeof(orte_gpr_trigger_action_t)); - break; - - case ORTE_GPR_NOTIFY_MSG_TYPE: - orte_gpr_base_quick_print(output, "ORTE_GPR_NOTIFY_MSG_TYPE", prefix, src, sizeof(orte_gpr_notify_msg_type_t)); - break; - - case ORTE_GPR_ADDR_MODE: - orte_gpr_base_quick_print(output, "ORTE_GPR_ADDR_MODE", prefix, src, sizeof(orte_gpr_addr_mode_t)); - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - return ORTE_SUCCESS; -} - -/* PRINT FUNCTIONS FOR COMPLEX TYPES */ - -/* KEYVAL */ -int orte_gpr_base_print_keyval(char **output, char *prefix, orte_gpr_keyval_t *src, orte_data_type_t type) -{ - char *tmp, *tmp2, *pfx, *prefx; - int rc; - - /* set default result */ - *output = NULL; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - if (NULL == src->key) { - asprintf(&tmp, "%sData for keyval: NULL key\n", prefx); - } else { - asprintf(&tmp, "%sData for keyval: Key: %s\n", prefx, src->key); - } - - asprintf(&pfx, "%s\t", prefx); - - if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->value, ORTE_DATA_VALUE))) { - ORTE_ERROR_LOG(rc); - free(pfx); - free(tmp); - return rc; - } - - asprintf(output, "%s%s\n", tmp, tmp2); - free(pfx); - free(tmp); - free(tmp2); - - return ORTE_SUCCESS; -} - -/* VALUE */ -int orte_gpr_base_print_gpr_value(char **output, char *prefix, orte_gpr_value_t *value, orte_data_type_t type) -{ - orte_gpr_addr_mode_t addr; - char *tmp, *tmp2, *tmp3, *pfx, *prefx; - orte_std_cntr_t j; - int rc; - - /* set default result */ - *output = NULL; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - if (NULL == value->segment) { - asprintf(&tmp, "%sValue from NULL segment name - %lu keyvals", - prefx, (unsigned long) value->cnt); - } else { - asprintf(&tmp, "%sValue from segment %s with %lu keyvals", - prefx, value->segment, (unsigned long) value->cnt); - } - - if (NULL == value->tokens) { - asprintf(&tmp2, "%s\n%s\tNULL tokens (wildcard)", tmp, prefx); - free(tmp); - } else { - asprintf(&tmp2, "%s\n%s\t%lu Tokens returned", tmp, prefx, (unsigned long) value->num_tokens); - free(tmp); - for (j=0; j < value->num_tokens; j++) { - if (NULL == value->tokens[j]) { - asprintf(&tmp, "%s\n%s\t\tToken %lu: NULL token pointer", tmp2, prefx, (unsigned long) j); - } else { - asprintf(&tmp, "%s\n%s\t\tToken %lu: %s", tmp2, prefx, (unsigned long) j, value->tokens[j]); - } - free(tmp2); - tmp2 = tmp; - } - } - - addr = value->addr_mode; - asprintf(&tmp, "%s\n%s\tToken addressing mode:", tmp2, prefx); - free(tmp2); - - if (0x0000 == (0x00ff & addr)) { - asprintf(&tmp2, "%s\n%s\t\tNONE\n", tmp, prefx); - free(tmp); - tmp = tmp2; - } else { - if (ORTE_GPR_TOKENS_AND & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TOKENS_AND", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_TOKENS_OR & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TOKENS_OR", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_TOKENS_XAND & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TOKENS_XAND", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_TOKENS_XOR & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TOKENS_XOR", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_TOKENS_NOT & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TOKENS_NOT", tmp, prefx); - free(tmp); - tmp = tmp2; - } - } - - asprintf(&tmp2, "%s\n%s\tKey addressing mode:", tmp, prefx); - free(tmp); - tmp = tmp2; - - if (0x0000 == (0xff00 & addr)) { - asprintf(&tmp2, "%s\n%s\t\tNONE\n", tmp, prefx); - free(tmp); - tmp = tmp2; - } else { - if (ORTE_GPR_KEYS_AND & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_KEYS_AND", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_KEYS_OR & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_KEYS_OR", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_KEYS_XAND & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_KEYS_XAND", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_KEYS_XOR & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_KEYS_XOR", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_KEYS_NOT & addr) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_KEYS_NOT", tmp, prefx); - free(tmp); - tmp = tmp2; - } - } - - /* indent another level for keyvals */ - asprintf(&pfx, "%s\t", prefx); - for (j=0; j < value->cnt; j++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_keyval(&tmp2, pfx, - (orte_gpr_keyval_t*)(value->keyvals[j]), ORTE_GPR_KEYVAL))) { - ORTE_ERROR_LOG(rc); - free(tmp); - free(pfx); - return rc; - } - asprintf(&tmp3, "%s\n%s", tmp, tmp2); - free(tmp); - free(tmp2); - tmp = tmp3; - } - - /* put results in final location */ - *output = tmp; - - return ORTE_SUCCESS; -} - - -/* SUBSCRIPTION */ -int orte_gpr_base_print_subscription(char **output, char *prefix, orte_gpr_subscription_t *sub, orte_data_type_t type) -{ - int rc; - char *tmp, *tmp2, *tmp3, *pfx, *prefx; - orte_std_cntr_t j; - - /* set default result */ - *output = NULL; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - if (NULL == sub->name) { - asprintf(&tmp, "%sSubscription - NO NAME\tid: %lu", prefx, (unsigned long) sub->id); - } else { - asprintf(&tmp, "%sSubscription - Name: %s\tid: %lu", prefx, sub->name, (unsigned long) sub->id); - } - - asprintf(&tmp2, "%s\n%s\tAction flags:", tmp, prefx); - free(tmp); - tmp = tmp2; - - if (ORTE_GPR_NOTIFY_NONE == sub->action) { - asprintf(&tmp2, "%s\n%s\t\tNONE\n", tmp, prefx); - free(tmp); - tmp = tmp2; - } else { - if (ORTE_GPR_NOTIFY_VALUE_CHG_TO & sub->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_NOTIFY_VALUE_CHG_TO", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_NOTIFY_VALUE_CHG_FRM & sub->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_NOTIFY_VALUE_CHG_FRM", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_NOTIFY_ADD_ENTRY & sub->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_NOTIFY_ADD_ENTRY", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_NOTIFY_DEL_ENTRY & sub->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_NOTIFY_DEL_ENTRY", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_NOTIFY_PRE_EXISTING & sub->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_NOTIFY_PRE_EXISTING", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & sub->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_NOTIFY_STARTS_AFTER_TRIG", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & sub->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_NOTIFY_DELETE_AFTER_TRIG", tmp, prefx); - free(tmp); - tmp = tmp2; - } - } - - /* indent another level for values */ - asprintf(&pfx, "%s\t", prefx); - for (j=0; j < sub->cnt; j++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_gpr_value(&tmp2, pfx, (orte_gpr_value_t*)(sub->values[j]), ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - free(tmp); - free(pfx); - return rc; - } - asprintf(&tmp3, "%s\n%s", tmp, tmp2); - free(tmp); - free(tmp2); - tmp = tmp3; - } - - /* put results in final location */ - *output = tmp; - - return ORTE_SUCCESS; -} - -/* TRIGGER */ -int orte_gpr_base_print_trigger(char **output, char *prefix, orte_gpr_trigger_t *trig, orte_data_type_t type) -{ - int rc; - char *tmp, *tmp2, *tmp3, *pfx, *prefx; - orte_std_cntr_t j; - - /* set default result */ - *output = NULL; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - if (NULL == trig->name) { - asprintf(&tmp, "%sTrigger - NO NAME\tid: %lu", prefx, (unsigned long) trig->id); - } else { - asprintf(&tmp, "%sTrigger - Name: %s\tid: %lu", prefx, trig->name, (unsigned long) trig->id); - } - - asprintf(&tmp2, "%s\n%s\tAction flags:", tmp, prefx); - free(tmp); - tmp = tmp2; - - if (0x00 == trig->action) { - asprintf(&tmp2, "%s\n%s\t\tNONE\n", tmp, prefx); - free(tmp); - tmp = tmp2; - } else { - if (ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS & trig->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_TRIG_ONE_SHOT & trig->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TRIG_ONE_SHOT", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME & trig->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TRIG_ROUTE_DATA_THRU_ME", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_TRIG_AT_LEVEL & trig->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TRIG_AT_LEVEL", tmp, prefx); - free(tmp); - tmp = tmp2; - } - if (ORTE_GPR_TRIG_CMP_LEVELS & trig->action) { - asprintf(&tmp2, "%s\n%s\t\tORTE_GPR_TRIG_CMP_LEVELS", tmp, prefx); - free(tmp); - tmp = tmp2; - } - } - - /* indent another level for values */ - asprintf(&pfx, "%s\t", prefx); - for (j=0; j < trig->cnt; j++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_gpr_value(&tmp2, pfx, (orte_gpr_value_t*)(trig->values[j]), ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - free(tmp); - free(pfx); - return rc; - } - asprintf(&tmp3, "%s\n%s", tmp, tmp2); - free(tmp); - free(tmp2); - tmp = tmp3; - } - - /* put results in final location */ - *output = tmp; - - return ORTE_SUCCESS; -} - -/* NOTIFY DATA */ -int orte_gpr_base_print_notify_data(char **output, char *prefix, orte_gpr_notify_data_t *data, orte_data_type_t type) -{ - char *tmp, *tmp2, *tmp3, *pfx, *prefx; - orte_std_cntr_t i, j; - orte_gpr_value_t **values; - int rc; - - /* set default result */ - *output = NULL; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - if (NULL != data->target) { - asprintf(&tmp, "%sNotify Data: %lu values going to subscription target %s", prefx, - (unsigned long) data->cnt, data->target); - } else { - asprintf(&tmp, "%sNotify Data: %lu values going to subscription num %lu", prefx, - (unsigned long) data->cnt, (unsigned long) data->id); - } - - values = (orte_gpr_value_t**)(data->values)->addr; - if (0 < data->cnt) { - /* indent another level for values */ - asprintf(&pfx, "%s\t", prefx); - - for (i=0, j=0; j < data->cnt && - i < (data->values)->size; i++) { - if (NULL != values[i]) { - j++; - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_gpr_value(&tmp2, pfx, (orte_gpr_value_t*)values[i], ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - free(tmp); - return rc; - } - asprintf(&tmp3, "%s\n%s", tmp, tmp2); - free(tmp2); - tmp = tmp3; - } - } - } - - /* put results in final location */ - *output = tmp; - - return ORTE_SUCCESS; -} - -/* NOTIFY MSG */ -int orte_gpr_base_print_notify_msg(char **output, char *prefix, orte_gpr_notify_message_t *msg, orte_data_type_t type) -{ - char *tmp, *tmp2, *tmp3, *pfx, *prefx; - orte_std_cntr_t i, j; - orte_gpr_notify_data_t **data; - int rc; - - /* set default result */ - *output = NULL; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - if (ORTE_GPR_TRIGGER_MSG == msg->msg_type) { - asprintf(&tmp, "%sTRIGGER message", prefx); - } else if (ORTE_GPR_SUBSCRIPTION_MSG == msg->msg_type) { - asprintf(&tmp, "%sSUBSCRIPTION message", prefx); - } - - if (NULL == msg->target) { - asprintf(&tmp2, "%s\n%s\tTrigger target: NULL", tmp, prefx); - } else { - asprintf(&tmp2, "%s\n%s\tTrigger target: %s", tmp, prefx, msg->target); - } - free(tmp); - - asprintf(&tmp, "%s\n%s\tTrigger id: %lu", tmp2, prefx, (unsigned long)msg->id); - free(tmp2); - - asprintf(&tmp2, "%s\n%s\t%lu Notify data structures in message", tmp2, prefx, - (unsigned long) msg->cnt); - free(tmp); - tmp = tmp2; - - if (0 < msg->cnt) { - /* indent another level for notify data */ - asprintf(&pfx, "%s\t", prefx); - - data = (orte_gpr_notify_data_t**)(msg->data)->addr; - for (i=0, j=0; j < msg->cnt && - i < (msg->data)->size; i++) { - if (NULL != data[i]) { - j++; - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_notify_data(&tmp2, pfx, (orte_gpr_notify_data_t*)data[i], ORTE_GPR_NOTIFY_DATA))) { - ORTE_ERROR_LOG(rc); - free(tmp); - return rc; - } - asprintf(&tmp3, "%s\n%s", tmp, tmp2); - free(tmp2); - tmp = tmp3; - } - } - } - - /* put results in final location */ - *output = tmp; - - return ORTE_SUCCESS; -} - -static void orte_gpr_base_quick_print(char **output, char *type_name, char *prefix, void *src, orte_std_cntr_t src_size) -{ - char *prefx; - uint8_t *ui8; - uint16_t *ui16; - uint32_t *ui32; -#ifdef HAVE_INT64_T - uint64_t *ui64; -#endif - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - switch(src_size) { - case 1: - ui8 = (uint8_t*)src; - asprintf(output, "%sData type: %s\tValue: %d", prefx, type_name, (int) *ui8); - break; - - case 2: - ui16 = (uint16_t*)src; - asprintf(output, "%sData type: %s\tValue: %d", prefx, type_name, (int) *ui16); - break; - - case 4: - ui32 = (uint32_t*)src; - asprintf(output, "%sData type: %s\tValue: %lu", prefx, type_name, (unsigned long) *ui32); - break; - -#ifdef HAVE_INT64_T - case 8: - ui64 = (uint64_t*)src; - asprintf(output, "%sData type: %s\tValue: %lu", prefx, type_name, (unsigned long) *ui64); - break; -#endif - - default: - return; - } - - return; -} diff --git a/orte/mca/gpr/base/data_type_support/gpr_data_type_release_fns.c b/orte/mca/gpr/base/data_type_support/gpr_data_type_release_fns.c deleted file mode 100644 index f3f5e5801b..0000000000 --- a/orte/mca/gpr/base/data_type_support/gpr_data_type_release_fns.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -/* - * STANDARD RELEASE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ -void orte_gpr_base_std_release(orte_data_value_t *value) -{ - free(value->data); - value->data = NULL; -} - -/* - * STANDARD OBJECT RELEASE FUNCTION - WORKS FOR EVERYTHING STRUCTURED - */ -void orte_gpr_base_std_obj_release(orte_data_value_t *value) -{ - OBJ_RELEASE(value->data); -} diff --git a/orte/mca/gpr/base/data_type_support/gpr_data_type_size_fns.c b/orte/mca/gpr/base/data_type_support/gpr_data_type_size_fns.c deleted file mode 100755 index a4089db064..0000000000 --- a/orte/mca/gpr/base/data_type_support/gpr_data_type_size_fns.c +++ /dev/null @@ -1,298 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -/* - * STANDARD SIZE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ -int orte_gpr_base_std_size(size_t *size, void *src, orte_data_type_t type) -{ - switch(type) { - case ORTE_GPR_CMD: - *size = sizeof(orte_gpr_cmd_flag_t); - break; - - case ORTE_GPR_SUBSCRIPTION_ID: - *size = sizeof(orte_gpr_subscription_id_t); - break; - - case ORTE_GPR_TRIGGER_ID: - *size = sizeof(orte_gpr_trigger_id_t); - break; - - case ORTE_GPR_NOTIFY_ACTION: - *size = sizeof(orte_gpr_notify_action_t); - break; - - case ORTE_GPR_TRIGGER_ACTION: - *size = sizeof(orte_gpr_trigger_action_t); - break; - - case ORTE_GPR_NOTIFY_MSG_TYPE: - *size = sizeof(orte_gpr_notify_msg_type_t); - break; - - case ORTE_GPR_ADDR_MODE: - *size = sizeof(orte_gpr_addr_mode_t); - break; - - default: - *size = 0; - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - return ORTE_SUCCESS; -} - -/* SIZE FUNCTIONS FOR COMPLEX TYPES */ - -/* KEYVAL */ -int orte_gpr_base_size_keyval(size_t *size, orte_gpr_keyval_t *src, orte_data_type_t type) -{ - size_t data_size; - int rc; - - /* account for the object itself */ - *size = sizeof(orte_gpr_keyval_t); - - /* if the src is NULL, then that's the only thing we return */ - if (NULL == src) return ORTE_SUCCESS; - - /*...and its payload */ - if (NULL != src->key) { - *size += strlen(src->key); - } - - /* size the data itself - use the appropriate size function here */ - if (ORTE_SUCCESS != (rc = orte_dss.size(&data_size, src->value, ORTE_DATA_VALUE))) { - ORTE_ERROR_LOG(rc); - *size = 0; - return rc; - } - *size += data_size; - - return ORTE_SUCCESS; -} - -/* VALUE */ -int orte_gpr_base_size_gpr_value(size_t *size, orte_gpr_value_t *src, orte_data_type_t type) -{ - orte_std_cntr_t i; - size_t data_size; - int rc; - - /* account for the object itself */ - *size = sizeof(orte_gpr_value_t); - - /* if the src is NULL, then that's the only thing we return */ - if (NULL == src) return ORTE_SUCCESS; - - /*...and its payload */ - if (NULL != src->segment) { - *size += strlen(src->segment); - } - - /* size the keyvals - use the appropriate size function here */ - if (0 < src->cnt) { - for (i=0; i < src->cnt; i++) { - *size += sizeof(orte_gpr_keyval_t*); /* account for size of object pointer */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_size_keyval(&data_size, src->keyvals[i], - ORTE_GPR_KEYVAL))) { - ORTE_ERROR_LOG(rc); - *size = 0; - return rc; - } - *size += data_size; - } - } - - /* size the tokens - use the appropriate size function here */ - if (0 < src->num_tokens) { - for (i=0; i < src->num_tokens; i++) { - *size += sizeof(char*); /* account for size of string pointer */ - if (NULL != src->tokens[i]) { - *size += strlen(src->tokens[i]); - } - } - } - - return ORTE_SUCCESS; -} - - -/* SUBSCRIPTION */ -int orte_gpr_base_size_subscription(size_t *size, orte_gpr_subscription_t *src, orte_data_type_t type) -{ - orte_std_cntr_t i; - size_t data_size; - int rc; - - /* account for the object itself */ - *size = sizeof(orte_gpr_subscription_t); - - /* if the src is NULL, then that's the only thing we return */ - if (NULL == src) return ORTE_SUCCESS; - - /*...and its payload */ - if (NULL != src->name) { - *size += strlen(src->name); - } - - /* size the values - use the appropriate size function here */ - if (0 < src->cnt) { - for (i=0; i < src->cnt; i++) { - *size += sizeof(orte_gpr_value_t*); /* account for object pointer */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_size_gpr_value(&data_size, src->values[i], ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - *size = 0; - return rc; - } - *size += data_size; - } - } - - return ORTE_SUCCESS; -} - -/* TRIGGER */ -int orte_gpr_base_size_trigger(size_t *size, orte_gpr_trigger_t *src, orte_data_type_t type) -{ - orte_std_cntr_t i; - size_t data_size; - int rc; - - /* account for the object itself */ - *size = sizeof(orte_gpr_trigger_t); - - /* if the src is NULL, then that's the only thing we return */ - if (NULL == src) return ORTE_SUCCESS; - - /*...and its payload */ - if (NULL != src->name) { - *size += strlen(src->name); - } - - /* size the values - use the appropriate size function here */ - if (0 < src->cnt) { - for (i=0; i < src->cnt; i++) { - *size += sizeof(orte_gpr_value_t*); /* account for object pointer */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_size_gpr_value(&data_size, src->values[i], ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - *size = 0; - return rc; - } - *size += data_size; - } - } - - return ORTE_SUCCESS; -} - -/* NOTIFY DATA */ -int orte_gpr_base_size_notify_data(size_t *size, orte_gpr_notify_data_t *src, orte_data_type_t type) -{ - orte_std_cntr_t j, k; - size_t data_size; - orte_gpr_value_t **val; - int rc; - - /* account for the object itself */ - *size = sizeof(orte_gpr_notify_data_t); - - /* if the src is NULL, then that's the only thing we return */ - if (NULL == src) return ORTE_SUCCESS; - - /*...and its payload */ - if (NULL != src->target) { - *size += strlen(src->target); - } - *size += sizeof(orte_pointer_array_t); /* account for size of pointer array object */ - *size += (src->values)->size * sizeof(void*); /* account for size of pointer array storage */ - - /* size the value entries */ - if (0 < src->cnt) { - val = (orte_gpr_value_t**)(src->values)->addr; - for (j=0, k=0; j < src->cnt && - k < (src->values)->size; k++) { - if (NULL != val[k]) { - j++; - /* account for size of value */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_size_gpr_value(&data_size, val[k], ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - *size = 0; - return rc; - } - *size += data_size; - } - } - } - - return ORTE_SUCCESS; -} - -/* NOTIFY MSG */ -int orte_gpr_base_size_notify_msg(size_t *size, orte_gpr_notify_message_t *src, orte_data_type_t type) -{ - orte_std_cntr_t j, k; - size_t data_size; - orte_gpr_notify_data_t **val; - int rc; - - /* account for the object itself */ - *size = sizeof(orte_gpr_notify_message_t); - - /* if the src is NULL, then that's the only thing we return */ - if (NULL == src) return ORTE_SUCCESS; - - /*...and its payload */ - if (NULL != src->target) { - *size += strlen(src->target); - } - *size += sizeof(orte_pointer_array_t); /* account for size of pointer array object */ - *size += (src->data)->size * sizeof(void*); /* account for size of pointer array storage */ - - /* size the notify data entries */ - if (0 < src->cnt) { - val = (orte_gpr_notify_data_t**)(src->data)->addr; - for (j=0, k=0; j < src->cnt && - k < (src->data)->size; k++) { - if (NULL != val[k]) { - j++; - /* account for size of notify data */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_size_notify_data(&data_size, val[k], ORTE_GPR_NOTIFY_DATA))) { - ORTE_ERROR_LOG(rc); - *size = 0; - return rc; - } - *size += data_size; - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/data_type_support/gpr_data_type_unpacking_fns.c b/orte/mca/gpr/base/data_type_support/gpr_data_type_unpacking_fns.c deleted file mode 100644 index c47ff3c33d..0000000000 --- a/orte/mca/gpr/base/data_type_support/gpr_data_type_unpacking_fns.c +++ /dev/null @@ -1,576 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "opal/util/trace.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss_internal.h" - -#include "orte/mca/gpr/base/base.h" - -/* - * GPR CMD - */ -int orte_gpr_base_unpack_cmd(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_CMD_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * SUBSCRIPTION ID - */ -int orte_gpr_base_unpack_subscription_id(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_SUBSCRIPTION_ID_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * TRIGGER ID - */ -int orte_gpr_base_unpack_trigger_id(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_TRIGGER_ID_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * NOTIFY ACTION - */ -int orte_gpr_base_unpack_notify_action(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_NOTIFY_ACTION_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * TRIGGER ACTION - */ -int orte_gpr_base_unpack_trigger_action(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_TRIGGER_ACTION_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * NOTIFY MSG TYPE - */ -int orte_gpr_base_unpack_notify_msg_type(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_NOTIFY_MSG_TYPE_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * ADDR MODE - */ -int orte_gpr_base_unpack_addr_mode(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - - OPAL_TRACE(4); - - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_ADDR_MODE_T))) { - ORTE_ERROR_LOG(rc); - } - - return rc; -} - -/* - * KEYVAL - */ -int orte_gpr_base_unpack_keyval(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_keyval_t **keyval; - orte_std_cntr_t i, max_n; - - OPAL_TRACE(4); - - /* unpack into an array of keyval objects */ - keyval = (orte_gpr_keyval_t**) dest; - for (i=0; i < *num_vals; i++) { - /* allocate the memory storage */ - keyval[i] = OBJ_NEW(orte_gpr_keyval_t); - if (NULL == keyval[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* unpack the key */ - max_n=1; - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(keyval[i]->key), - &max_n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the data value */ - max_n=1; - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(keyval[i]->value), - &max_n, ORTE_DATA_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - return ORTE_SUCCESS; -} - -/* - * VALUE - */ -int orte_gpr_base_unpack_value(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_value_t **values; - orte_std_cntr_t i, max_n=1; - - OPAL_TRACE(4); - - /* unpack into array of value objects */ - values = (orte_gpr_value_t**) dest; - for (i=0; i < *num_vals; i++) { - /* create the value object */ - values[i] = OBJ_NEW(orte_gpr_value_t); - if (NULL == values[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* unpack the address mode */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(values[i]->addr_mode), - &max_n, ORTE_GPR_ADDR_MODE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the segment name */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(values[i]->segment), - &max_n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* get the number of tokens */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(values[i]->num_tokens), - &max_n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are tokens, allocate the required space for the char * pointers */ - if (0 < values[i]->num_tokens) { - values[i]->tokens = (char **)malloc(values[i]->num_tokens * sizeof(char*)); - if (NULL == values[i]->tokens) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* and unpack them */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, values[i]->tokens, - &(values[i]->num_tokens), ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* get the number of keyval pairs */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(values[i]->cnt), - &max_n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are keyvals, allocate the required space for the keyval object pointers */ - if(0 < values[i]->cnt) { - values[i]->keyvals = (orte_gpr_keyval_t**)malloc(values[i]->cnt * sizeof(orte_gpr_keyval_t*)); - if (NULL == values[i]->keyvals) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* unpack the keyval pairs */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, values[i]->keyvals, - &(values[i]->cnt), ORTE_GPR_KEYVAL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - - return ORTE_SUCCESS; -} - -/* - * SUBSCRIPTION - */ -int orte_gpr_base_unpack_subscription(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_subscription_t **subs; - orte_std_cntr_t i, max_n=1; - - OPAL_TRACE(4); - - /* unpack into array of subscription objects */ - subs = (orte_gpr_subscription_t**) dest; - for (i=0; i < *num_vals; i++) { - /* create the subscription object */ - subs[i] = OBJ_NEW(orte_gpr_subscription_t); - if (NULL == subs[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* unpack the subscription name */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(subs[i]->name), - &max_n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the subscription id */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(subs[i]->id), - &max_n, ORTE_GPR_SUBSCRIPTION_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the subscription action */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(subs[i]->action), - &max_n, ORTE_GPR_NOTIFY_ACTION))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* get the number of values */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(subs[i]->cnt), - &max_n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are values, allocate the required space for the value pointers */ - if (0 < subs[i]->cnt) { - subs[i]->values = (orte_gpr_value_t**)malloc(subs[i]->cnt * sizeof(orte_gpr_value_t*)); - if (NULL == subs[i]->values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* and unpack them */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, subs[i]->values, - &(subs[i]->cnt), ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* the pointer fields for cb_func and user_tag were NOT packed - * so ignore them here as well - */ - } - - return ORTE_SUCCESS; -} - -/* - * TRIGGER - */ -int orte_gpr_base_unpack_trigger(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_trigger_t **trigs; - orte_std_cntr_t i, max_n=1; - - OPAL_TRACE(4); - - /* unpack into array of trigger objects */ - trigs = (orte_gpr_trigger_t**) dest; - for (i=0; i < *num_vals; i++) { - /* create the trigger object */ - trigs[i] = OBJ_NEW(orte_gpr_trigger_t); - if (NULL == trigs[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* unpack the trigger name */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(trigs[i]->name), - &max_n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the trigger id */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(trigs[i]->id), - &max_n, ORTE_GPR_TRIGGER_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the trigger action */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(trigs[i]->action), - &max_n, ORTE_GPR_TRIGGER_ACTION))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* get the number of values */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(trigs[i]->cnt), - &max_n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are values, allocate the required space for the value pointers */ - if (0 < trigs[i]->cnt) { - trigs[i]->values = (orte_gpr_value_t**)malloc(trigs[i]->cnt * sizeof(orte_gpr_value_t*)); - if (NULL == trigs[i]->values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* and unpack them */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, trigs[i]->values, - &(trigs[i]->cnt), ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - - return ORTE_SUCCESS; -} - -/* - * NOTIFY DATA - */ -int orte_gpr_base_unpack_notify_data(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_notify_data_t **data; - orte_gpr_value_t **values; - orte_std_cntr_t i, j, max_n=1; - - OPAL_TRACE(4); - - /* unpack into array of notify_data objects */ - data = (orte_gpr_notify_data_t**) dest; - - for (i=0; i < *num_vals; i++) { - /* create the data object */ - data[i] = OBJ_NEW(orte_gpr_notify_data_t); - if (NULL == data[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* unpack the subscription name */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(data[i]->target), - &max_n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the subscription number */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(data[i]->id), - &max_n, ORTE_GPR_SUBSCRIPTION_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the remove flag */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(data[i]->remove), - &max_n, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* get the number of values */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(data[i]->cnt), - &max_n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are values, unpack them to the value array */ - if (0 < data[i]->cnt) { - if (ORTE_SUCCESS != (rc = orte_pointer_array_set_size(data[i]->values, data[i]->cnt))) { - ORTE_ERROR_LOG(rc); - return rc; - } - values = (orte_gpr_value_t**)(data[i]->values)->addr; - for (j=0; j < data[i]->cnt; j++) { - max_n = 1; - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(values[j]), - &max_n, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - - return ORTE_SUCCESS; -} - - -/* - * NOTIFY MSG - */ -int orte_gpr_base_unpack_notify_msg(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - orte_gpr_notify_message_t **msg; - orte_gpr_notify_data_t **data; - orte_std_cntr_t i, j, max_n=1; - - OPAL_TRACE(4); - - /* unpack into array of notify_data objects */ - msg = (orte_gpr_notify_message_t**) dest; - - for (i=0; i < *num_vals; i++) { - /* create the data object */ - msg[i] = OBJ_NEW(orte_gpr_notify_message_t); - if (NULL == msg[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* unpack the message type */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(msg[i]->msg_type), - &max_n, ORTE_GPR_NOTIFY_MSG_TYPE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the trigger name */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(msg[i]->target), - &max_n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the trigger number */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(msg[i]->id), - &max_n, ORTE_GPR_TRIGGER_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the remove flag */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(msg[i]->remove), - &max_n, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* get the number of datagrams */ - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(msg[i]->cnt), - &max_n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there are datagrams, unpack them to the data array */ - if (0 < msg[i]->cnt) { - if (ORTE_SUCCESS != (rc = orte_pointer_array_set_size(msg[i]->data, msg[i]->cnt))) { - ORTE_ERROR_LOG(rc); - return rc; - } - data = (orte_gpr_notify_data_t**)(msg[i]->data)->addr; - for (j=0; j < msg[i]->cnt; j++) { - max_n = 1; - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(data[j]), - &max_n, ORTE_GPR_NOTIFY_DATA))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/gpr_base_close.c b/orte/mca/gpr/base/gpr_base_close.c deleted file mode 100644 index 1e21bd2221..0000000000 --- a/orte/mca/gpr/base/gpr_base_close.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" - -#include "orte/mca/gpr/base/base.h" - - -int orte_gpr_base_close(void) -{ - OPAL_TRACE(5); - - /* If we have a selected component and module, then finalize it */ - - if (orte_gpr_base_selected) { - orte_gpr_base_selected_component.gpr_finalize(); - } - - /* Close all remaining available components (may be one if this is a - OMPI RTE program, or [possibly] multiple if this is ompi_info) */ - - mca_base_components_close(orte_gpr_base_output, - &orte_gpr_base_components_available, NULL); - - /* All done */ - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/gpr_base_create_value_keyval.c b/orte/mca/gpr/base/gpr_base_create_value_keyval.c deleted file mode 100755 index e5231ef293..0000000000 --- a/orte/mca/gpr/base/gpr_base_create_value_keyval.c +++ /dev/null @@ -1,129 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file - * - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - - -int orte_gpr_base_create_value(orte_gpr_value_t **value, - orte_gpr_addr_mode_t addr_mode, - char *segment, - orte_std_cntr_t cnt, /**< Number of keyval objects */ - orte_std_cntr_t num_tokens) -{ - orte_gpr_value_t *val; - - OPAL_TRACE(1); - - *value = OBJ_NEW(orte_gpr_value_t); - if (NULL == *value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - val = *value; - - /* get space for the specified number of keyvals */ - if (0 < cnt) { - val->keyvals = (orte_gpr_keyval_t**)malloc(cnt * sizeof(orte_gpr_keyval_t*)); - if (NULL == val->keyvals) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(val); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* initialize it to NULL */ - memset(val->keyvals, 0, cnt * sizeof(orte_gpr_keyval_t*)); - } - - /* get space for the specified number of tokens */ - if (0 < num_tokens) { - val->tokens = (char**)malloc((1+num_tokens) * sizeof(char*)); - if (NULL == val->tokens) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(val); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* initialize it to NULL and ensure that the array is NULL terminated */ - memset(val->tokens, 0, (1+num_tokens) * sizeof(char*)); - } - - val->addr_mode = addr_mode; - if (NULL != segment) { - val->segment = strdup(segment); - } - val->cnt = cnt; - val->num_tokens = num_tokens; - - return ORTE_SUCCESS; -} - - -int orte_gpr_base_create_keyval(orte_gpr_keyval_t **keyval, - char *key, - orte_data_type_t type, - void *data) -{ - orte_gpr_keyval_t *kv; - int rc; - - OPAL_TRACE(1); - - *keyval = OBJ_NEW(orte_gpr_keyval_t); - if (NULL == *keyval) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - kv = *keyval; - - /* if the type is ORTE_UNDEF, then we don't actually want to create a data_value. This - * is the case, for example, when we are doing subscriptions as the keyval is used simply - * to transmit the key - the data_value field must remain NULL - */ - if (ORTE_UNDEF != type) { - kv->value = OBJ_NEW(orte_data_value_t); - if (NULL == kv->value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(kv); - return ORTE_ERR_OUT_OF_RESOURCE; - } - kv->value->type = type; - if (NULL != data) { - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(kv->value->data), data, type))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(kv); - return rc; - } - } - } - - if (NULL != key) { - kv->key = strdup(key); - } - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/base/gpr_base_open.c b/orte/mca/gpr/base/gpr_base_open.c deleted file mode 100644 index 6957a43616..0000000000 --- a/orte/mca/gpr/base/gpr_base_open.c +++ /dev/null @@ -1,528 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - - - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_module_t struct. - */ - -#include "orte/mca/gpr/base/static-components.h" - -/* JMS: This is only INT_MAX until bug 1345 is fixed, because this - value is used to set an MAC parameter, which can [currently] only - take an int. */ -#define ORTE_GPR_ARRAY_MAX_SIZE INT_MAX -#define ORTE_GPR_ARRAY_BLOCK_SIZE 16 - - - -/* - * globals - */ - -/** KEYVAL **/ -/* constructor - used to initialize state of keyval instance */ -static void orte_gpr_keyval_construct(orte_gpr_keyval_t* keyval) -{ - keyval->key = NULL; - keyval->value = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_keyval_destructor(orte_gpr_keyval_t* keyval) -{ - - if (NULL != keyval->key) free(keyval->key); - if (NULL != keyval->value) OBJ_RELEASE(keyval->value); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_keyval_t, /* type name */ - opal_list_item_t, /* parent "class" name */ - orte_gpr_keyval_construct, /* constructor */ - orte_gpr_keyval_destructor); /* destructor */ - - - -/** VALUE **/ -/* constructor - used to initialize state of registry value instance */ -static void orte_gpr_value_construct(orte_gpr_value_t* reg_val) -{ - reg_val->addr_mode = 0; - reg_val->segment = NULL; - reg_val->cnt = 0; - reg_val->keyvals = NULL; - reg_val->num_tokens = 0; - reg_val->tokens = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_value_destructor(orte_gpr_value_t* reg_val) -{ - char **tokens; - orte_std_cntr_t i; - - if (NULL != reg_val->segment) free(reg_val->segment); - - if (0 < reg_val->cnt && NULL != reg_val->keyvals) { - for (i=0; i < reg_val->cnt; i++) { - if (NULL != reg_val->keyvals[i]) - OBJ_RELEASE(reg_val->keyvals[i]); - } - free(reg_val->keyvals); - } - - if (0 < reg_val->num_tokens && NULL != reg_val->tokens) { - tokens = reg_val->tokens; - for (i=0; i < reg_val->num_tokens; i++) { - if(NULL != tokens[i]) - free(tokens[i]); - } - free(tokens); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_value_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_value_construct, /* constructor */ - orte_gpr_value_destructor); /* destructor */ - - -/** NOTIFY DATA **/ -/* constructor - used to initialize state of registry value instance */ -static void orte_gpr_notify_data_construct(orte_gpr_notify_data_t* ptr) -{ - ptr->target = NULL; - ptr->id = ORTE_GPR_SUBSCRIPTION_ID_MAX; - ptr->remove = false; - ptr->cnt = 0; - orte_pointer_array_init(&(ptr->values), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); - -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_notify_data_destructor(orte_gpr_notify_data_t* ptr) -{ - orte_std_cntr_t i, j; - orte_gpr_value_t **values; - - if (NULL != ptr->target) free(ptr->target); - - if (NULL != ptr->values) { - values = (orte_gpr_value_t**)(ptr->values)->addr; - for (i=0, j=0; j < ptr->cnt && - i < (ptr->values)->size; i++) { - if (NULL != values[i]) { - j++; - OBJ_RELEASE(values[i]); - } - } - OBJ_RELEASE(ptr->values); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_notify_data_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_notify_data_construct, /* constructor */ - orte_gpr_notify_data_destructor); /* destructor */ - - -/** SUBSCRIPTION **/ -/* constructor - used to initialize state of registry subscription instance */ -static void orte_gpr_subscription_construct(orte_gpr_subscription_t* sub) -{ - sub->name = NULL; - sub->id = ORTE_GPR_SUBSCRIPTION_ID_MAX; - sub->action = 0; - sub->cnt = 0; - sub->values = NULL; - sub->cbfunc = NULL; - sub->user_tag = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_subscription_destructor(orte_gpr_subscription_t* sub) -{ - orte_std_cntr_t i; - - if (NULL != sub->name) free(sub->name); - - if (0 < sub->cnt && NULL != sub->values) { - for (i=0; i < sub->cnt; i++) { - OBJ_RELEASE(sub->values[i]); - } - free(sub->values); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_subscription_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_subscription_construct, /* constructor */ - orte_gpr_subscription_destructor); /* destructor */ - - -/** TRIGGER **/ -/* constructor - used to initialize state of registry subscription instance */ -static void orte_gpr_trigger_construct(orte_gpr_trigger_t* trig) -{ - trig->name = NULL; - trig->id = ORTE_GPR_TRIGGER_ID_MAX; - trig->action = 0; - trig->cnt = 0; - trig->values = NULL; - trig->cbfunc = NULL; - trig->user_tag = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_trigger_destructor(orte_gpr_trigger_t* trig) -{ - orte_std_cntr_t i; - - if (NULL != trig->name) free(trig->name); - - if (0 < trig->cnt && NULL != trig->values) { - for (i=0; i < trig->cnt; i++) OBJ_RELEASE(trig->values[i]); - free(trig->values); - } - -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_trigger_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_trigger_construct, /* constructor */ - orte_gpr_trigger_destructor); /* destructor */ - - -/** NOTIFY MESSAGE */ -/* constructor - used to initialize notify message instance */ -static void orte_gpr_notify_message_construct(orte_gpr_notify_message_t* msg) -{ - msg->msg_type = 0; - msg->target = NULL; - msg->id = ORTE_GPR_TRIGGER_ID_MAX; - msg->remove = false; - msg->cnt = 0; - orte_pointer_array_init(&(msg->data), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_notify_message_destructor(orte_gpr_notify_message_t* msg) -{ - orte_std_cntr_t i, j; - orte_gpr_notify_data_t **data; - - if (NULL != msg->target) free(msg->target); - - if (NULL != msg->data) { - data = (orte_gpr_notify_data_t**)(msg->data)->addr; - for (i=0, j=0; j < msg->cnt && - i < (msg->data)->size; i++) { - if (NULL != data[i]) { - j++; - OBJ_RELEASE(data[i]); - } - } - OBJ_RELEASE(msg->data); - } - -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_notify_message_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_notify_message_construct, /* constructor */ - orte_gpr_notify_message_destructor); /* destructor */ - - -/* - * Global variables - */ -int orte_gpr_base_output = -1; -size_t orte_gpr_array_max_size, orte_gpr_array_block_size; -orte_gpr_base_module_t orte_gpr; -bool orte_gpr_base_selected = false; -opal_list_t orte_gpr_base_components_available; -mca_gpr_base_component_t orte_gpr_base_selected_component; -opal_mutex_t orte_gpr_mutex; - - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -int orte_gpr_base_open(void) -{ - int param, value, rc, id; - orte_data_type_t tmp; - opal_output_stream_t kill_prefix; - - OPAL_TRACE(5); - - /* Debugging / verbose output */ - /** setup the structure to kill the blasted prefix that opal_output - * now defaults to including so the output can be legible again! - */ - OBJ_CONSTRUCT(&kill_prefix, opal_output_stream_t); - kill_prefix.lds_want_stderr = true; - kill_prefix.lds_prefix = NULL; - - param = mca_base_param_reg_int_name("gpr", "base_verbose", - "Verbosity level for the gpr framework", - false, false, 0, &value); - if (value != 0) { - kill_prefix.lds_verbose_level = value; - } - orte_gpr_base_output = opal_output_open(&kill_prefix); - OBJ_DESTRUCT(&kill_prefix); - - id = mca_base_param_register_int("gpr", "base", "maxsize", NULL, - ORTE_GPR_ARRAY_MAX_SIZE); - mca_base_param_lookup_int(id, ¶m); - orte_gpr_array_max_size = (size_t)param; - - id = mca_base_param_register_int("gpr", "base", "blocksize", NULL, - ORTE_GPR_ARRAY_BLOCK_SIZE); - mca_base_param_lookup_int(id, ¶m); - orte_gpr_array_block_size = (size_t)param; - - /* register the base data types with the DPS */ - tmp = ORTE_GPR_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_cmd, - orte_gpr_base_unpack_cmd, - (orte_dss_copy_fn_t)orte_gpr_base_copy_cmd, - (orte_dss_compare_fn_t)orte_gpr_base_compare_cmd, - (orte_dss_size_fn_t)orte_gpr_base_std_size, - (orte_dss_print_fn_t)orte_gpr_base_std_print, - (orte_dss_release_fn_t)orte_gpr_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_GPR_CMD", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_SUBSCRIPTION_ID; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_subscription_id, - orte_gpr_base_unpack_subscription_id, - (orte_dss_copy_fn_t)orte_gpr_base_copy_subscription_id, - (orte_dss_compare_fn_t)orte_gpr_base_compare_subscription_id, - (orte_dss_size_fn_t)orte_gpr_base_std_size, - (orte_dss_print_fn_t)orte_gpr_base_std_print, - (orte_dss_release_fn_t)orte_gpr_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_GPR_SUBSCRIPTION_ID", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_TRIGGER_ID; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_trigger_id, - orte_gpr_base_unpack_trigger_id, - (orte_dss_copy_fn_t)orte_gpr_base_copy_trigger_id, - (orte_dss_compare_fn_t)orte_gpr_base_compare_trigger_id, - (orte_dss_size_fn_t)orte_gpr_base_std_size, - (orte_dss_print_fn_t)orte_gpr_base_std_print, - (orte_dss_release_fn_t)orte_gpr_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_GPR_TRIGGER_ID", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_NOTIFY_ACTION; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_notify_action, - orte_gpr_base_unpack_notify_action, - (orte_dss_copy_fn_t)orte_gpr_base_copy_notify_action, - (orte_dss_compare_fn_t)orte_gpr_base_compare_notify_action, - (orte_dss_size_fn_t)orte_gpr_base_std_size, - (orte_dss_print_fn_t)orte_gpr_base_std_print, - (orte_dss_release_fn_t)orte_gpr_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_GPR_NOTIFY_ACTION", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_TRIGGER_ACTION; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_trigger_action, - orte_gpr_base_unpack_trigger_action, - (orte_dss_copy_fn_t)orte_gpr_base_copy_trigger_action, - (orte_dss_compare_fn_t)orte_gpr_base_compare_trigger_action, - (orte_dss_size_fn_t)orte_gpr_base_std_size, - (orte_dss_print_fn_t)orte_gpr_base_std_print, - (orte_dss_release_fn_t)orte_gpr_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_GPR_TRIGGER_ACTION", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_NOTIFY_MSG_TYPE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_notify_msg_type, - orte_gpr_base_unpack_notify_msg_type, - (orte_dss_copy_fn_t)orte_gpr_base_copy_notify_msg_type, - (orte_dss_compare_fn_t)orte_gpr_base_compare_notify_msg_type, - (orte_dss_size_fn_t)orte_gpr_base_std_size, - (orte_dss_print_fn_t)orte_gpr_base_std_print, - (orte_dss_release_fn_t)orte_gpr_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_GPR_NOTIFY_MSG_TYPE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_ADDR_MODE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_addr_mode, - orte_gpr_base_unpack_addr_mode, - (orte_dss_copy_fn_t)orte_gpr_base_copy_addr_mode, - (orte_dss_compare_fn_t)orte_gpr_base_compare_addr_mode, - (orte_dss_size_fn_t)orte_gpr_base_std_size, - (orte_dss_print_fn_t)orte_gpr_base_std_print, - (orte_dss_release_fn_t)orte_gpr_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_GPR_ADDR_MODE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_KEYVAL; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_keyval, - orte_gpr_base_unpack_keyval, - (orte_dss_copy_fn_t)orte_gpr_base_copy_keyval, - (orte_dss_compare_fn_t)orte_gpr_base_compare_keyval, - (orte_dss_size_fn_t)orte_gpr_base_size_keyval, - (orte_dss_print_fn_t)orte_gpr_base_print_keyval, - (orte_dss_release_fn_t)orte_gpr_base_std_obj_release, - ORTE_DSS_STRUCTURED, - "ORTE_GPR_KEYVAL", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_VALUE; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_value, - orte_gpr_base_unpack_value, - (orte_dss_copy_fn_t)orte_gpr_base_copy_gpr_value, - (orte_dss_compare_fn_t)orte_gpr_base_compare_gpr_value, - (orte_dss_size_fn_t)orte_gpr_base_size_gpr_value, - (orte_dss_print_fn_t)orte_gpr_base_print_gpr_value, - (orte_dss_release_fn_t)orte_gpr_base_std_obj_release, - ORTE_DSS_STRUCTURED, - "ORTE_GPR_VALUE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_SUBSCRIPTION; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_subscription, - orte_gpr_base_unpack_subscription, - (orte_dss_copy_fn_t)orte_gpr_base_copy_subscription, - (orte_dss_compare_fn_t)orte_gpr_base_compare_subscription, - (orte_dss_size_fn_t)orte_gpr_base_size_subscription, - (orte_dss_print_fn_t)orte_gpr_base_print_subscription, - (orte_dss_release_fn_t)orte_gpr_base_std_obj_release, - ORTE_DSS_STRUCTURED, - "ORTE_GPR_SUBSCRIPTION", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_TRIGGER; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_trigger, - orte_gpr_base_unpack_trigger, - (orte_dss_copy_fn_t)orte_gpr_base_copy_trigger, - (orte_dss_compare_fn_t)orte_gpr_base_compare_trigger, - (orte_dss_size_fn_t)orte_gpr_base_size_trigger, - (orte_dss_print_fn_t)orte_gpr_base_print_trigger, - (orte_dss_release_fn_t)orte_gpr_base_std_obj_release, - ORTE_DSS_STRUCTURED, - "ORTE_GPR_TRIGGER", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_NOTIFY_DATA; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_notify_data, - orte_gpr_base_unpack_notify_data, - (orte_dss_copy_fn_t)orte_gpr_base_copy_notify_data, - (orte_dss_compare_fn_t)orte_gpr_base_compare_notify_data, - (orte_dss_size_fn_t)orte_gpr_base_size_notify_data, - (orte_dss_print_fn_t)orte_gpr_base_print_notify_data, - (orte_dss_release_fn_t)orte_gpr_base_std_obj_release, - ORTE_DSS_STRUCTURED, - "ORTE_GPR_NOTIFY_DATA", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_GPR_NOTIFY_MSG; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_gpr_base_pack_notify_msg, - orte_gpr_base_unpack_notify_msg, - (orte_dss_copy_fn_t)orte_gpr_base_copy_notify_msg, - (orte_dss_compare_fn_t)orte_gpr_base_compare_notify_msg, - (orte_dss_size_fn_t)orte_gpr_base_size_notify_msg, - (orte_dss_print_fn_t)orte_gpr_base_print_notify_msg, - (orte_dss_release_fn_t)orte_gpr_base_std_obj_release, - ORTE_DSS_STRUCTURED, - "ORTE_GPR_NOTIFY_MSG", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* Open up all available components */ - if (ORTE_SUCCESS != - mca_base_components_open("gpr", - orte_gpr_base_output, - mca_gpr_base_static_components, - &orte_gpr_base_components_available, true)) { - return ORTE_ERROR; - } - - /* All done */ - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/gpr_base_select.c b/orte/mca/gpr/base/gpr_base_select.c deleted file mode 100644 index 20cfc7bf02..0000000000 --- a/orte/mca/gpr/base/gpr_base_select.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" - -#include "orte/mca/gpr/base/base.h" - - -/** - * Function for selecting one component from all those that are - * available. - */ -int orte_gpr_base_select(void) -{ - opal_list_item_t *item; - mca_base_component_list_item_t *cli; - mca_gpr_base_component_t *component, *best_component = NULL; - orte_gpr_base_module_t *module, *best_module = NULL; - bool multi, hidden; - int priority, best_priority = -1; - - OPAL_TRACE(5); - - /* Iterate through all the available components */ - - for (item = opal_list_get_first(&orte_gpr_base_components_available); - item != opal_list_get_end(&orte_gpr_base_components_available); - item = opal_list_get_next(item)) { - cli = (mca_base_component_list_item_t *) item; - component = (mca_gpr_base_component_t *) cli->cli_component; - - /* Call the component's init function and see if it wants to be - selected */ - module = component->gpr_init(&multi, &hidden, &priority); - - /* If we got a non-NULL module back, then the component wants to - be selected. So save its multi/hidden values and save the - module with the highest priority */ - - if (NULL != module) { - /* If this is the best one, save it */ - if (priority > best_priority) { - - /* If there was a previous best one, finalize */ - - if (NULL != best_component) { - best_component->gpr_finalize(); - } - - /* Save the new best one */ - best_module = module; - best_component = component; - - /* update the best priority */ - best_priority = priority; - } - - /* If it's not the best one, finalize it */ - - else { - component->gpr_finalize(); - } - } - } - - /* If we didn't find one to select, barf */ - - if (NULL == best_component) { - return ORTE_ERROR; - } - - /* We have happiness -- save the component and module for later - usage */ - - orte_gpr = *best_module; - orte_gpr_base_selected_component = *best_component; - orte_gpr_base_selected = true; - - /* all done */ - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/gpr_base_simplified_put.c b/orte/mca/gpr/base/gpr_base_simplified_put.c deleted file mode 100644 index be1df05391..0000000000 --- a/orte/mca/gpr/base/gpr_base_simplified_put.c +++ /dev/null @@ -1,125 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file - * - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - - -int orte_gpr_base_put_1(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - char *key, orte_data_value_t *data_value) -{ - orte_gpr_value_t *values; - orte_gpr_value_t value = ORTE_GPR_VALUE_EMPTY; - orte_gpr_keyval_t *keyval; - orte_std_cntr_t i; - int rc; - - OPAL_TRACE(1); - - value.addr_mode = addr_mode; - value.segment = segment; - value.cnt = 1; - value.keyvals = &keyval; - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_keyval(&keyval, key, - data_value->type, data_value->data))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - value.tokens = tokens; - /* must count the number of tokens */ - value.num_tokens = 0; - if (NULL != tokens) { - for (i=0; NULL != tokens[i]; i++) { - (value.num_tokens)++; - } - } - values = &value; - - /* put the value on the registry */ - if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &values))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(keyval); - return rc; - } - - /* cleanup */ - OBJ_RELEASE(keyval); - - return ORTE_SUCCESS; -} - - -int orte_gpr_base_put_N(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - orte_std_cntr_t n, char **keys, - orte_data_value_t **data_values) -{ - orte_gpr_value_t *value; - orte_std_cntr_t i, num_tokens; - int rc; - - OPAL_TRACE(1); - - /* must count the number of tokens */ - num_tokens = 0; - if (NULL != tokens) { - for (i=0; NULL != tokens[i]; i++) { - num_tokens++; - } - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&value, addr_mode, segment, n, num_tokens))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - for (i=0; i < n; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_keyval(&(value->keyvals[i]), keys[i], data_values[i]->type, data_values[i]->data))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - } - - for (i=0; i < value->num_tokens; i++) { - value->tokens[i] = strdup(tokens[i]); - } - - /* put the value on the registry */ - if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) { - ORTE_ERROR_LOG(rc); - } - - /* clean up memory */ - OBJ_RELEASE(value); - - return rc; -} - diff --git a/orte/mca/gpr/base/gpr_base_simplified_subscribe.c b/orte/mca/gpr/base/gpr_base_simplified_subscribe.c deleted file mode 100644 index c53c5a3024..0000000000 --- a/orte/mca/gpr/base/gpr_base_simplified_subscribe.c +++ /dev/null @@ -1,380 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file - * - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_subscribe_1(orte_gpr_subscription_id_t *id, - char *trig_name, - char *sub_name, - orte_gpr_notify_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - char *key, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag) -{ - orte_gpr_value_t *values; - orte_gpr_keyval_t *keyval; - orte_gpr_value_t value = ORTE_GPR_VALUE_EMPTY; - orte_gpr_subscription_t *subs; - orte_gpr_subscription_t sub = ORTE_GPR_SUBSCRIPTION_EMPTY; - orte_gpr_trigger_t *trigs; - orte_gpr_trigger_t trig = ORTE_GPR_TRIGGER_EMPTY; - orte_std_cntr_t i; - int rc; - - OPAL_TRACE(1); - - /* assemble the subscription object */ - subs = ⊂ - sub.name = sub_name; - sub.action = action; - sub.cnt = 1; - values = &value; - sub.values = &values; - sub.cbfunc = cbfunc; - sub.user_tag = user_tag; - - value.addr_mode = addr_mode; - value.segment = segment; - value.cnt = 1; - value.keyvals = &keyval; - - value.tokens = tokens; - /* must count the number of tokens */ - value.num_tokens = 0; - if (NULL != tokens) { - for (i=0; NULL != tokens[i]; i++) { - (value.num_tokens)++; - } - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_keyval(&keyval, key, - ORTE_UNDEF, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* send the subscription */ - if (NULL == trig_name) { /* no trigger provided */ - if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &subs, 0, NULL))) { - ORTE_ERROR_LOG(rc); - } - - } else { - trigs = &trig; - trig.name = trig_name; - if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &subs, 1, &trigs))) { - ORTE_ERROR_LOG(rc); - } - - } - - /* cleanup */ - OBJ_RELEASE(keyval); - - /* return the subscription id */ - *id = sub.id; - - return rc; -} - - -int orte_gpr_base_subscribe_N(orte_gpr_subscription_id_t *id, - char *trig_name, - char *sub_name, - orte_gpr_notify_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag) -{ - orte_gpr_subscription_t *sub; - orte_gpr_trigger_t *trig; - orte_std_cntr_t i, num_tokens; - int rc; - - OPAL_TRACE(1); - - /* assemble the subscription object */ - sub = OBJ_NEW(orte_gpr_subscription_t); - if (NULL == sub) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (NULL != sub_name) { - sub->name = strdup(sub_name); - } - sub->action = action; - sub->cnt = 1; - sub->cbfunc = cbfunc; - sub->user_tag = user_tag; - - /* must count the number of tokens */ - num_tokens = 0; - if (NULL != tokens) { - for (i=0; NULL != tokens[i]; i++) { - num_tokens++; - } - } - - /* create the value object */ - sub->values = (orte_gpr_value_t**)malloc(sizeof(orte_gpr_value_t*)); - if (NULL == sub->values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(sub); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&(sub->values[0]), addr_mode, segment, n, num_tokens))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(sub); - return rc; - } - - for (i=0; i < n; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_keyval(&(sub->values[0]->keyvals[i]), keys[i], ORTE_UNDEF, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(sub); - return rc; - } - } - - /* copy the tokens */ - for (i=0; i < sub->values[0]->num_tokens; i++) { - sub->values[0]->tokens[i] = strdup(tokens[i]); - } - - /* send the subscription */ - if (NULL == trig_name) { /* no trigger provided */ - if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &sub, 0, NULL))) { - ORTE_ERROR_LOG(rc); - } - - } else { - trig = OBJ_NEW(orte_gpr_trigger_t); - if (NULL == trig) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(sub); - return ORTE_ERR_OUT_OF_RESOURCE; - } - trig->name = strdup(trig_name); - if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &sub, 1, &trig))) { - ORTE_ERROR_LOG(rc); - } - OBJ_RELEASE(trig); - } - - /* return the subscription id */ - *id = sub->id; - - /* clean up memory */ - OBJ_RELEASE(sub); - - return rc; -} - - -int orte_gpr_base_define_trigger(orte_gpr_trigger_id_t *id, - char *trig_name, - orte_gpr_trigger_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_gpr_trigger_cb_fn_t cbfunc, - void *user_tag) -{ - orte_gpr_trigger_t *trig; - orte_std_cntr_t i, num_tokens; - int rc; - - OPAL_TRACE(1); - - /* check for error - this function can only be used to define triggers - * that compare their values to each other. It cannot be used to define - * triggers that fire when reaching a specified value as there is no - * way to specify a trigger level within this API - */ - if (ORTE_GPR_TRIG_AT_LEVEL & action) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* assemble the trigger object */ - trig = OBJ_NEW(orte_gpr_trigger_t); - if (NULL == trig) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (NULL != trig_name) { - trig->name = strdup(trig_name); - } - trig->action = action; - trig->cnt = 1; - trig->cbfunc = cbfunc; - trig->user_tag = user_tag; - - /* must count the number of tokens */ - num_tokens = 0; - if (NULL != tokens) { - for (i=0; NULL != tokens[i]; i++) { - num_tokens++; - } - } - - /* create the value object */ - trig->values = (orte_gpr_value_t**)malloc(sizeof(orte_gpr_value_t*)); - if (NULL == trig->values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&(trig->values[0]), addr_mode, segment, n, num_tokens))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(trig); - return rc; - } - - for (i=0; i < n; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_keyval(&(trig->values[0]->keyvals[i]), keys[i], ORTE_UNDEF, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(trig); - return rc; - } - } - - for (i=0; i < trig->values[0]->num_tokens; i++) { - trig->values[0]->tokens[i] = strdup(tokens[i]); - } - - /* send the subscription */ - if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(0, NULL, 1, &trig))) { - ORTE_ERROR_LOG(rc); - } - - /* return the subscription id */ - *id = trig->id; - - /* clean up memory */ - OBJ_RELEASE(trig); - - return rc; -} - -int orte_gpr_base_define_trigger_level(orte_gpr_trigger_id_t *id, - char *trig_name, - orte_gpr_trigger_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_std_cntr_t *levels, - orte_gpr_trigger_cb_fn_t cbfunc, - void *user_tag) -{ - orte_gpr_trigger_t *trig; - orte_std_cntr_t i, num_tokens; - int rc; - - OPAL_TRACE(1); - - /* check for error - this function can only be used to define triggers - * that fire at a specified level. It cannot be used to define - * triggers that compare their values to each other - */ - if (ORTE_GPR_TRIG_CMP_LEVELS & action || NULL == trig_name) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* assemble the trigger object */ - trig = OBJ_NEW(orte_gpr_trigger_t); - if (NULL == trig) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (NULL != trig_name) { - trig->name = strdup(trig_name); - } - trig->action = action; - trig->cnt = 1; - trig->cbfunc = cbfunc; - trig->user_tag = user_tag; - - /* must count the number of tokens */ - num_tokens = 0; - if (NULL != tokens) { - for (i=0; NULL != tokens[i]; i++) { - num_tokens++; - } - } - - /* create the value object */ - trig->values = (orte_gpr_value_t**)malloc(sizeof(orte_gpr_value_t*)); - if (NULL == trig->values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&(trig->values[0]), addr_mode, segment, n, num_tokens))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(trig); - return rc; - } - - for (i=0; i < n; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_keyval(&(trig->values[0]->keyvals[i]), keys[i], ORTE_STD_CNTR, &(levels[i])))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(trig); - return rc; - } - } - - for (i=0; i < trig->values[0]->num_tokens; i++) { - trig->values[0]->tokens[i] = strdup(tokens[i]); - } - - /* send the subscription */ - if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(0, NULL, 1, &trig))) { - ORTE_ERROR_LOG(rc); - } - - /* return the subscription id */ - *id = trig->id; - - /* clean up memory */ - OBJ_RELEASE(trig); - - return rc; -} diff --git a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_arithmetic_ops.c b/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_arithmetic_ops.c deleted file mode 100644 index 7eca1139db..0000000000 --- a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_arithmetic_ops.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_pack_arith(orte_buffer_t *cmd, - orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_dss_arith_op_t operation, - orte_data_value_t *operand) -{ - orte_gpr_cmd_flag_t command; - orte_std_cntr_t n; - char **ptr; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_ARITH_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &addr_mode, 1, ORTE_GPR_ADDR_MODE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &segment, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* compute number of tokens */ - n = 0; - if (NULL != tokens) { - ptr = tokens; - while (NULL != ptr[n]) { - n++; - } - } - - /* pack number of tokens */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &n, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (n > 0) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, tokens, n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* compute number of keys */ - n = 0; - if (NULL != keys) { - ptr = keys; - while (NULL != ptr[n]) { - n++; - } - } - - /* pack number of keys */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &n, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (n > 0) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, keys, n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /** pack the operation flag */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &operation, 1, ORTE_ARITH_OP))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** pack the operand */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &operand, 1, ORTE_DATA_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_pack_increment_value(orte_buffer_t *cmd, orte_gpr_value_t *value) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_INCREMENT_VALUE_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &value, 1, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; - -} - -int orte_gpr_base_pack_decrement_value(orte_buffer_t *cmd, orte_gpr_value_t *value) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_DECREMENT_VALUE_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &value, 1, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_cleanup.c b/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_cleanup.c deleted file mode 100644 index 2f278af445..0000000000 --- a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_cleanup.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_pack_cleanup_job(orte_buffer_t *buffer, orte_jobid_t jobid) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_CLEANUP_JOB_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &command, 1, ORTE_GPR_CMD))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &jobid, 1, ORTE_JOBID))) { - return rc; - } - - return ORTE_SUCCESS; -} - - -int orte_gpr_base_pack_cleanup_proc(orte_buffer_t *buffer, orte_process_name_t *proc) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_CLEANUP_PROC_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &command, 1, ORTE_GPR_CMD))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, proc, 1, ORTE_NAME))) { - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_del_index.c b/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_del_index.c deleted file mode 100644 index bc91604ccf..0000000000 --- a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_del_index.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_pack_delete_segment(orte_buffer_t *cmd, char *segment) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_DELETE_SEGMENT_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &segment, 1, ORTE_STRING))) { - return rc; - } - - return ORTE_SUCCESS; -} - - -int orte_gpr_base_pack_delete_entries(orte_buffer_t *cmd, - orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys) -{ - orte_gpr_cmd_flag_t command; - char **ptr; - orte_std_cntr_t n; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_DELETE_ENTRIES_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &mode, 1, ORTE_GPR_ADDR_MODE))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &segment, 1, ORTE_STRING))) { - return rc; - } - - /* compute number of tokens */ - if (NULL == tokens) { - n = 0; - } else { - ptr = tokens; - n = 0; - while (NULL != ptr[n]) { - n++; - } - } - - /* pack number of tokens */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &n, 1, ORTE_STD_CNTR))) { - return rc; - } - - /* pack tokens ONLY if n > 0 */ - if (0 < n) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, tokens, n, ORTE_STRING))) { - return rc; - } - } - /* compute number of keys */ - if (NULL == keys) { - n = 0; - } else { - ptr = keys; - n = 0; - while (NULL != ptr[n]) { - n++; - } - } - - /* pack number of keys */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &n, 1, ORTE_STD_CNTR))) { - return rc; - } - - /* pack keys ONLY if n > 0 */ - if (0 < n) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, keys, n, ORTE_STRING))) { - return rc; - } - } - return ORTE_SUCCESS; -} - - -int orte_gpr_base_pack_index(orte_buffer_t *cmd, char *segment) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_INDEX_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* it's okay to pack a NULL string, so pack the segment regardless */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &segment, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_dump.c b/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_dump.c deleted file mode 100644 index a1b52bb68f..0000000000 --- a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_dump.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_pack_dump_all(orte_buffer_t *cmd) -{ - orte_gpr_cmd_flag_t command; - - OPAL_TRACE(3); - - command = ORTE_GPR_DUMP_ALL_CMD; - - return orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD); -} - -int orte_gpr_base_pack_dump_segments(orte_buffer_t *cmd, char *segment) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_DUMP_SEGMENTS_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &segment, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd, orte_gpr_trigger_id_t start) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_DUMP_TRIGGERS_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &start, 1, ORTE_GPR_TRIGGER_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - -int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd, orte_gpr_subscription_id_t start) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &start, 1, ORTE_GPR_SUBSCRIPTION_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - -int orte_gpr_base_pack_dump_a_trigger(orte_buffer_t *cmd, char *name, orte_gpr_trigger_id_t id) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_DUMP_A_TRIGGER_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &name, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &id, 1, ORTE_GPR_TRIGGER_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - -int orte_gpr_base_pack_dump_a_subscription(orte_buffer_t *cmd, char *name, - orte_gpr_subscription_id_t id) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &name, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &id, 1, ORTE_GPR_SUBSCRIPTION_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_pack_dump_callbacks(orte_buffer_t *cmd) -{ - orte_gpr_cmd_flag_t command; - - OPAL_TRACE(3); - - command = ORTE_GPR_DUMP_CALLBACKS_CMD; - - return orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD); -} - -int orte_gpr_base_pack_dump_segment_size(orte_buffer_t *cmd, char *segment) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_DUMP_SEGMENT_SIZE_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &segment, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_put_get.c b/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_put_get.c deleted file mode 100644 index 8956b83403..0000000000 --- a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_put_get.c +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_pack_put(orte_buffer_t *cmd, - orte_std_cntr_t cnt, orte_gpr_value_t **values) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_PUT_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the number of values */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &cnt, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the values, if any */ - if (0 < cnt) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, values, cnt, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - return ORTE_SUCCESS; -} - - -int orte_gpr_base_pack_get(orte_buffer_t *cmd, - orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys) -{ - orte_gpr_cmd_flag_t command; - char **ptr; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - command = ORTE_GPR_GET_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &mode, 1, ORTE_GPR_ADDR_MODE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &segment, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* compute number of tokens */ - n = 0; - if (NULL != tokens) { - ptr = tokens; - while (NULL != ptr[n]) { - n++; - } - } - - /* pack number of tokens */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &n, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (n > 0) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, tokens, n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* compute number of keys */ - n = 0; - if (NULL != keys) { - ptr = keys; - while (NULL != ptr[n]) { - n++; - } - } - - /* pack number of keys */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &n, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (n > 0) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, keys, n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_pack_get_conditional(orte_buffer_t *cmd, - orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t num_conditions, orte_gpr_keyval_t **conditions) -{ - orte_gpr_cmd_flag_t command; - char **ptr; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - command = ORTE_GPR_GET_CONDITIONAL_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &mode, 1, ORTE_GPR_ADDR_MODE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &segment, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* compute number of tokens */ - n = 0; - if (NULL != tokens) { - ptr = tokens; - while (NULL != ptr[n]) { - n++; - } - } - - /* pack number of tokens */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &n, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (n > 0) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, tokens, n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* compute number of keys */ - n = 0; - if (NULL != keys) { - ptr = keys; - while (NULL != ptr[n]) { - n++; - } - } - - /* pack number of keys */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &n, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (n > 0) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, keys, n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* pack number of conditions */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &num_conditions, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack conditions */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, conditions, num_conditions, ORTE_GPR_KEYVAL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_subscribe.c b/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_subscribe.c deleted file mode 100644 index 0f286022f2..0000000000 --- a/orte/mca/gpr/base/pack_api_cmd/gpr_base_pack_subscribe.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_pack_subscribe(orte_buffer_t *cmd, - orte_std_cntr_t num_subs, - orte_gpr_subscription_t **subscriptions, - orte_std_cntr_t num_trigs, - orte_gpr_trigger_t **trigs) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_SUBSCRIBE_CMD; - - /* can't be both NULL */ - if (NULL == subscriptions && NULL == trigs) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the nummber of subscriptions - if there are any, pack them */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &num_subs, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (0 < num_subs) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, subscriptions, num_subs, ORTE_GPR_SUBSCRIPTION))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* pack the nummber of triggers - if there are any, pack them */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &num_trigs, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (0 < num_trigs) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, trigs, num_trigs, ORTE_GPR_TRIGGER))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - return ORTE_SUCCESS; -} - - -int orte_gpr_base_pack_unsubscribe(orte_buffer_t *cmd, - orte_gpr_subscription_id_t id) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_UNSUBSCRIBE_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &id, 1, ORTE_GPR_SUBSCRIPTION_ID))) { - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_pack_cancel_trigger(orte_buffer_t *cmd, orte_gpr_trigger_id_t id) -{ - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_CANCEL_TRIGGER_CMD; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_GPR_CMD))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &id, 1, ORTE_GPR_TRIGGER_ID))) { - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/unpack_api_response/gpr_base_dump_notify.c b/orte/mca/gpr/base/unpack_api_response/gpr_base_dump_notify.c deleted file mode 100644 index 55058903fd..0000000000 --- a/orte/mca/gpr/base/unpack_api_response/gpr_base_dump_notify.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "opal/util/output.h" - -#include "orte/mca/gpr/base/base.h" - -static void orte_gpr_base_dump_load_string(orte_buffer_t *buffer, char **tmp); - -int orte_gpr_base_dump_notify_msg(orte_buffer_t *buffer, - orte_gpr_notify_message_t *msg) -{ - char *tmp_out; - int rc; - - OPAL_TRACE(3); - - asprintf(&tmp_out, "\nDUMP OF NOTIFY MESSAGE STRUCTURE"); - orte_gpr_base_dump_load_string(buffer, &tmp_out); - - if (NULL == msg) { - asprintf(&tmp_out, "\tNULL msg pointer"); - orte_gpr_base_dump_load_string(buffer, &tmp_out); - return ORTE_SUCCESS; - } - - if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp_out, "\t", msg, ORTE_GPR_NOTIFY_MSG))) { - ORTE_ERROR_LOG(rc); - return rc; - } - orte_gpr_base_dump_load_string(buffer, &tmp_out); - - - return ORTE_SUCCESS; -} - -int orte_gpr_base_dump_notify_data(orte_buffer_t *buffer, - orte_gpr_notify_data_t *data) -{ - char *tmp_out; - int rc; - - OPAL_TRACE(3); - - asprintf(&tmp_out, "\nDUMP OF NOTIFY DATA STRUCTURE"); - orte_gpr_base_dump_load_string(buffer, &tmp_out); - - if (NULL == data) { - asprintf(&tmp_out, "\tNULL data pointer"); - orte_gpr_base_dump_load_string(buffer, &tmp_out); - return ORTE_SUCCESS; - } - - if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp_out, "\t", data, ORTE_GPR_NOTIFY_DATA))) { - ORTE_ERROR_LOG(rc); - return rc; - } - orte_gpr_base_dump_load_string(buffer, &tmp_out); - - return ORTE_SUCCESS; -} - -int orte_gpr_base_dump_value(orte_buffer_t *buffer, orte_gpr_value_t *value) -{ - char *tmp_out; - int rc; - - OPAL_TRACE(3); - - asprintf(&tmp_out, "\nDUMP OF GPR VALUE STRUCTURE"); - orte_gpr_base_dump_load_string(buffer, &tmp_out); - - if (NULL == value) { - asprintf(&tmp_out, "\tNULL pointer"); - orte_gpr_base_dump_load_string(buffer, &tmp_out); - return ORTE_SUCCESS; - } - - if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp_out, "", value, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - orte_gpr_base_dump_load_string(buffer, &tmp_out); - - return ORTE_SUCCESS; -} - -int orte_gpr_base_dump_keyval_value(orte_buffer_t *buffer, orte_gpr_keyval_t *iptr) -{ - char *tmp_out; - int rc; - - asprintf(&tmp_out, "\nDUMP OF GPR KEYVAL STRUCTURE"); - orte_gpr_base_dump_load_string(buffer, &tmp_out); - - if (NULL == iptr) { - asprintf(&tmp_out, "\tNULL pointer"); - orte_gpr_base_dump_load_string(buffer, &tmp_out); - return ORTE_SUCCESS; - } - - if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp_out, "", iptr, ORTE_GPR_KEYVAL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - orte_gpr_base_dump_load_string(buffer, &tmp_out); - - return ORTE_SUCCESS; -} - - -static void orte_gpr_base_dump_load_string(orte_buffer_t *buffer, char **tmp) -{ - orte_dss.pack(buffer, tmp, 1, ORTE_STRING); - free(*tmp); - -} diff --git a/orte/mca/gpr/base/unpack_api_response/gpr_base_print_dump.c b/orte/mca/gpr/base/unpack_api_response/gpr_base_print_dump.c deleted file mode 100644 index 93ac5ae6e4..0000000000 --- a/orte/mca/gpr/base/unpack_api_response/gpr_base_print_dump.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "opal/util/output.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_print_dump(orte_buffer_t *buffer) -{ - char *line; - orte_std_cntr_t n; - - n = 1; - while (ORTE_SUCCESS == orte_dss.unpack(buffer, &line, &n, ORTE_STRING)) { - opal_output(orte_gpr_base_output, "%s", line); - free(line); - n=1; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_arithmetic_ops.c b/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_arithmetic_ops.c deleted file mode 100644 index 9d47d8c248..0000000000 --- a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_arithmetic_ops.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_unpack_arith(orte_buffer_t *buffer, int *ret) -{ - orte_gpr_cmd_flag_t command; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_ARITH_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_unpack_increment_value(orte_buffer_t *cmd, int *ret) -{ - orte_gpr_cmd_flag_t command; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_INCREMENT_VALUE_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_unpack_decrement_value(orte_buffer_t *cmd, int *ret) -{ - orte_gpr_cmd_flag_t command; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_DECREMENT_VALUE_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_cleanup.c b/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_cleanup.c deleted file mode 100644 index 3b4416c431..0000000000 --- a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_cleanup.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_unpack_cleanup_job(orte_buffer_t *cmd, int *ret) -{ - orte_gpr_cmd_flag_t command; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_CLEANUP_JOB_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_unpack_cleanup_proc(orte_buffer_t *cmd, int *ret) -{ - orte_gpr_cmd_flag_t command; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_CLEANUP_PROC_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_del_index.c b/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_del_index.c deleted file mode 100644 index a8e2d898a1..0000000000 --- a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_del_index.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - - -int orte_gpr_base_unpack_delete_segment(orte_buffer_t *buffer, int *ret) -{ - orte_gpr_cmd_flag_t command; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_DELETE_SEGMENT_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - - -int orte_gpr_base_unpack_delete_entries(orte_buffer_t *buffer, int *ret) -{ - orte_gpr_cmd_flag_t command; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_DELETE_ENTRIES_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - - -int orte_gpr_base_unpack_index(orte_buffer_t *buffer, int *ret, orte_std_cntr_t *cnt, char ***index) -{ - orte_gpr_cmd_flag_t command; - orte_std_cntr_t n; - int rc; - - OPAL_TRACE(3); - - *cnt = 0; - *index = NULL; - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_INDEX_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 < n) { - *index = (char **)malloc(n*sizeof(char*)); - if (NULL == *index) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, *index, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - *cnt = n; - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_put_get.c b/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_put_get.c deleted file mode 100644 index 38370eae00..0000000000 --- a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_put_get.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - base unpack functions. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - -int orte_gpr_base_unpack_put(orte_buffer_t *buffer, int *ret) -{ - orte_gpr_cmd_flag_t command; - int rc; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_PUT_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; - -} - - -int orte_gpr_base_unpack_get(orte_buffer_t *buffer, int *ret, orte_std_cntr_t *cnt, orte_gpr_value_t ***values) -{ - orte_gpr_cmd_flag_t command; - int rc; - orte_std_cntr_t n, num; - - OPAL_TRACE(3); - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_GET_CMD != command && ORTE_GPR_GET_CONDITIONAL_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - /* find out how many values came back */ - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &num, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* if there were some, then get them */ - if (0 < num) { - *values = (orte_gpr_value_t**)malloc(num*sizeof(orte_gpr_value_t*)); - if (NULL == *values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, *values, &num, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - free(*values); - return rc; - } - } - - /* unpack the response code */ - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != *ret) { - ORTE_ERROR_LOG(*ret); - return rc; - } - - *cnt = num; - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_subscribe.c b/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_subscribe.c deleted file mode 100644 index 45f3cbb4fa..0000000000 --- a/orte/mca/gpr/base/unpack_api_response/gpr_base_unpack_subscribe.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - unpack functions. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/base/base.h" - - -int orte_gpr_base_unpack_subscribe(orte_buffer_t *buffer, int *ret) -{ - orte_gpr_cmd_flag_t command; - orte_std_cntr_t n; - int rc; - - OPAL_TRACE(3); - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_SUBSCRIBE_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - - -int orte_gpr_base_unpack_unsubscribe(orte_buffer_t *buffer, int *ret) -{ - orte_gpr_cmd_flag_t command; - orte_std_cntr_t n; - int rc; - - OPAL_TRACE(3); - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_UNSUBSCRIBE_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_base_unpack_cancel_trigger(orte_buffer_t *buffer, int *ret) -{ - orte_gpr_cmd_flag_t command; - orte_std_cntr_t n; - int rc; - - OPAL_TRACE(3); - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_GPR_CANCEL_TRIGGER_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, ret, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/gpr.h b/orte/mca/gpr/gpr.h deleted file mode 100644 index 2780b83923..0000000000 --- a/orte/mca/gpr/gpr.h +++ /dev/null @@ -1,826 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file - * @page gpr_api - */ - -/** - * \brief General Purpose Registry (GPR) API - * - * The Open MPI General Purpose Registry (GPR) - */ - -#ifndef ORTE_GPR_H_ -#define ORTE_GPR_H_ - -/* - * includes - */ - -#include "orte_config.h" - -#include - -#include "orte/orte_constants.h" -#include "opal/class/opal_list.h" - -#include "opal/mca/mca.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/rml/rml_types.h" - -#include "orte/dss/dss_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/rml/rml_types.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Component functions that MUST be provided - */ - -/* - * Perform any one-time initialization required by the module - * after RML/NS are available. - */ -typedef int (*orte_gpr_base_module_init_fn_t)(void); - -/* - * Begin recording a compound command. - * Normally, the registry executes each command as it is called. This, however, can result - * in an undesirable amount of network traffic. To reduce the traffic, this command allows - * the user to aggregate a set of registry commands - in any combination of put, get, index, - * or any other command - to be executed via a single communication to the registry. - * - * While recording, all registry commands are stored in a buffer instead of being immediately - * executed. Thus, commands that retrieve information (e.g., "get") will return a NULL - * during recording. Values from these commands will be returned when the compound - * command is actually executed. - * - * The process of recording a compound command is thread safe. Threads attempting to - * record commands are held on a lock until given access in their turn. - * - * @param None - * @retval ORTE_SUCCESS Compound command recorder is active. - * @retval ORTE_ERROR Compound command recorder did not activate. - * - * @code - * ompi_gpr.begin_compound_cmd(); - * @endcode - * - */ -typedef int (*orte_gpr_base_module_begin_compound_cmd_fn_t)(orte_buffer_t *buffer); - -/* - * Stop recording a compound command - * Terminates the recording process and clears the buffer of any previous commands - * - * @param None - * @retval ORTE_SUCCESS Recording stopped and buffer successfully cleared - * @retval ORTE_ERROR Didn't work - no idea why it wouldn't - * - * @code - * orte_gpr.stop_compound_cmd(); - * @endcode - * - */ -typedef int (*orte_gpr_base_module_stop_compound_cmd_fn_t)(void); - -/* - * Execute the compound command (BLOCKING) - * Execute the compound command that has been recorded. The function returns a status - * code that indicates whether or not all the included commands were successfully - * executed. Failure of any command contained in the compound command will terminate - * execution of the compound command list and return an error to the caller. - * - * @param none - * @retval ORTE_SUCCESS All commands in the list were successfully executed. - * @retval ORTE_ERROR(s) A command in the list failed, returning the indicated - * error code. - * - * @code - * status_code = orte_gpr.exec_compound_cmd(); - * @endcode - * - */ -typedef int (*orte_gpr_base_module_exec_compound_cmd_fn_t)(orte_buffer_t *buffer); - -/* - * Process a compound command buffer for a third-party (BLOCKING) - */ -typedef int (*orte_gpr_base_module_process_compound_cmd_fn_t)(orte_buffer_t *buffer, - orte_process_name_t *name); - - -/* - * Cleanup a job from the registry - * Remove all references to a given job from the registry. This includes removing - * all segments "owned" by the job, and removing all process names from dictionaries - * in the registry. - * - * @param jobid The jobid to be cleaned up. - * - * @retval ORTE_SUCCESS Operation was successfully completed. - * @retval ORTE_ERROR(s) Operation failed, returning the provided error code. - * - * @code - * status_code = orte_gpr.cleanup_job(jobid); - * @endcode - * - */ -typedef int (*orte_gpr_base_module_cleanup_job_fn_t)(orte_jobid_t jobid); - -/* - * Cleanup a process from the registry - * Remove all references to a given process from the registry. This includes removing - * the process name from all dictionaries in the registry, all subscriptions, etc. - * It also includes reducing any synchros on the job segment. - * - * @param proc A pointer to the process name to be cleaned up. - * - * @retval ORTE_SUCCESS Operation was successfully completed. - * @retval ORTE_ERROR(s) Operation failed, returning the provided error code. - * - * @code - * status_code = orte_gpr.cleanup_process(&proc); - * @endcode - * - */ -typedef int (*orte_gpr_base_module_cleanup_proc_fn_t)(orte_process_name_t *proc); - -/* - * Define and initialize a segment - * The registry contains segments which store containers of data. - * Although the registry can create segments "on-the-fly", it is often - * more efficient to initialize the segment via a separate command - thus - * allowing the registry to allocate the base storage for all the - * containers in a single malloc. - * - * Note that if the given segment already exists, this function simply - * makes sure it has enough space to store at least the passed number - * of containers - * - * @param name A character string indicating the name of the segment. - * @param num_slots The number of containers expected in this segment. This - * is just the starting number requested by the user - the registry will - * dynamically expand the segment as required. - * - * @retval ORTE_SUCCESS The operation was successfully executed. - * @retval ORTE_ERROR(s) An appropriate error code is returned. - * - * @code - * status_code = orte_gpr.preallocate_segment("MY_SEGMENT", num_slots); - * @endcode - */ -typedef int (*orte_gpr_base_module_preallocate_segment_fn_t)(char *name, orte_std_cntr_t num_slots); - -/* - * Get the number of entries on a segment or in a container - * Returns the number of containers on a segment (if NULL tokens provided) or in a container - * (if tokens provided - NULL terminated list) - */ -typedef int (*orte_gpr_base_module_get_number_entries_fn_t)(orte_std_cntr_t *n, char *segment, char **tokens); - -/* - * Delete a segment from the registry (BLOCKING) - * This command removes an entire segment from the registry, including all data objects, - * associated subscriptions, and synchros. This is a non-reversible process, so it should - * be used with care. - * - * @param segment Character string specifying the name of the segment to be removed. - * - * @retval ORTE_SUCCESS Segment successfully removed. - * @retval ORTE_ERROR(s) Segment could not be removed for some reason - most - * likely, the segment name provided was not found in the registry. - * - * @code - * status_code = orte_gpr.delete_segment(segment); - * @endcode - */ -typedef int (*orte_gpr_base_module_delete_segment_fn_t)(char *segment); - -/* - * Delete a segment from the registry (NON-BLOCKING) - * A non-blocking version of delete segment. - */ -typedef int (*orte_gpr_base_module_delete_segment_nb_fn_t)(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - - -/* - * Put a data object on the registry (BLOCKING) - * Place a data item on the registry using a blocking operation - i.e., the calling - * program will be blocked until the operation completes. - * - * Each value contains the addressing mode to be used. Addresses are defined by the tokens provided - * that describe the object being stored. The caller has the option of specifying how - * those tokens are to be combined in describing the object. Passing a value of - * "ORTE_REGISTRY_AND", for example, indicates that all provided tokens are to be used. - * In contrast, a value of "ORTE_REGISTRY_OR" indicates that any of the provided tokens - * can adequately describe the object. For the "put" command, only "ORTE_REGISTRY_XAND" - * is accepted - in other words, the tokens must exactly match those of any existing - * object in order for the object to be updated. In addition, the "ORTE_REGISTRY_OVERWRITE" - * flag must be or'd into the mode to enable update of the data object. If a data object - * is found with the identical token description, but ORTE_REGISTRY_OVERWRITE is NOT specified, - * then an error will be generated - the data object will NOT be overwritten in this - * situation. - * - * Upon completing the "put", all subscriptions registered on the - * specified segment are checked and appropriately processed. - * - * @param cnt The number of value structures to be stored. - * - * @param **values A pointer to the start of a contiguous array of one or more - * pointers to orte_gpr_value_t - * objects to be stored. The registry will copy this data onto the specified segment - the - * calling program is responsible for freeing any memory, if appropriate. - * - * @retval ORTE_SUCCESS The data has been stored on the specified segment, or the - * corresponding existing data has been updated. - * - * @retval ORTE_ERROR(s) The data was not stored on the specified segment, or the - * corresponding existing data was not found, or the data was found but the overwrite - * flag was not set. - * - * @code - * orte_gpr_value_t *value; - * - * status_code = orte_gpr.put(1, &value); - * @endcode - */ -typedef int (*orte_gpr_base_module_put_fn_t)(orte_std_cntr_t cnt, orte_gpr_value_t **values); - -/* simplified version of the put command */ -typedef int (*orte_gpr_base_module_put_1_fn_t)(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - char *key, orte_data_value_t *value); - -typedef int (*orte_gpr_base_module_put_N_fn_t)(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - orte_std_cntr_t n, char **keys, - orte_data_value_t **data_values); - - -/* - * Put data on the registry (NON-BLOCKING) - * A non-blocking version of put. - */ -typedef int (*orte_gpr_base_module_put_nb_fn_t)(orte_std_cntr_t cnt, orte_gpr_value_t **values, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - - -/* - * Get data from the registry (BLOCKING) - * Returns data from the registry. Given an addressing mode, segment name, and a set - * of tokens describing the data to be retrieved, the "get" function will search the specified - * registry segment and return all data items that "match" the description. Addressing - * modes specify how the provided tokens are to be combined to determine the match - - * a value of "ORTE_REGISTRY_AND", for example, indictates that all the tokens must be - * included in the object's description, but allows for other tokens to also be present. - * A value of "ORTE_REGISTRY_XAND", in contrast, requires that all the tokens be present, - * and that ONLY those tokens be present. - * - * The data is returned as a list of orte_gpr_value_t objects. The caller is - * responsible for freeing this data storage. Only copies of the registry data are - * returned - thus, any actions taken by the caller will NOT impact data stored on the - * registry. - * - * @param addr_mode (IN) The addressing mode to be used in the search. - * @param *segment (IN) A character string indicating the name of the segment to be searched. - * @param **tokens (IN) A NULL-terminated **char list of tokens describing the objects to be - * returned. A value of NULL indicates that ALL data on the segment is to be returned. - * @param **keys (IN) A NULL-terminated **char array of keys describing the specific - * key-value data to be returned. A value of NULL indicates that ALL key-value pairs - * described by the segment/token combination are to be returned. - * - * @param *cnt (OUT) A pointer to the number of objects returned by the request. - * @param ***values (OUT) A pointer to an array of orte_gpr_value_t object pointers - * containing the data - * returned by the specified search, including the segment and container id info - * for each keyval pair. - * - * @retval ORTE_SUCCESS Operation was successfully completed. - * @retval ORTE_ERROR(s) Operation failed, returning the provided error code. - * - * @code - * opal_list_t *keyval_list; - * orte_std_cntr_t cnt; - * orte_gpr_value_t **values; - * - * status_code = orte_gpr.get(addr_mode, segment, tokens, keyval_list, - * &cnt, &values); - * @endcode - */ -typedef int (*orte_gpr_base_module_get_fn_t)(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values); - -typedef int (*orte_gpr_base_module_get_conditional_fn_t)(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t num_conditions, orte_gpr_keyval_t **conditions, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values); - -/* - * Get data from the registry (NON-BLOCKING) - * A non-blocking version of get. Data is returned to the callback function in the - * notify message format. - */ -typedef int (*orte_gpr_base_module_get_nb_fn_t)(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - - -/* - * Delete an object from the registry (BLOCKING) - * Remove an object from the registry. Given an addressing mode, segment name, and a set - * of tokens describing the data object, the function will search the specified - * registry segment and delete all data items that "match" the description. Addressing - * modes specify how the provided tokens are to be combined to determine the match - - * a value of "ORTE_REGISTRY_AND", for example, indictates that all the tokens must be - * included in the object's description, but allows for other tokens to also be present. - * A value of "ORTE_REGISTRY_XAND", in contrast, requires that all the tokens be present, - * and that ONLY those tokens be present. - * - * Note: A value of NULL for the tokens will delete ALL data items from the specified - * segment. - * - * @param addr_mode The addressing mode to be used in the search. - * @param *segment A character string indicating the name of the segment to be searched. - * @param **tokens A NULL-terminated **char list of tokens describing the objects to be - * returned. A value of NULL indicates that ALL data on the segment is to be removed. - * - * @retval ORTE_SUCCESS Operation was successfully completed. - * @retval ORTE_ERROR(s) Operation failed, returning the provided error code. - * - * @code - * status_code = orte_gpr.delete_object(mode, segment, tokens); - * @endcode - */ -typedef int (*orte_gpr_base_module_delete_entries_fn_t)(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys); - -/* - * Delete an object from the registry (NON-BLOCKING) - * A non-blocking version of delete object. Result of the command is returned - * to the callback function in the notify msg format. - */ -typedef int (*orte_gpr_base_module_delete_entries_nb_fn_t)( - orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); -/* - * Obtain an index of a specified dictionary (BLOCKING) - * The registry contains a dictionary at the global level (containing names of all the - * segments) and a dictionary for each segment (containing the names of all tokens used - * in that segment). This command allows the caller to obtain a list of all entries - * in the specified dictionary. - * - * @param *segment (IN) A character string indicating the segment whose dictionary is to be - * indexed. A value of NULL indicates that the global level dictionary is to be used. - * - * @param *cnt (IN) A pointer to a orte_std_cntr_t location for storing the number of - * tokens in the index. - * @param ***index (IN) The address to place a char** array of strings containing an - * index of the specified dictionary. - * - * @retval ORTE_SUCCESS Operation was successfully completed. - * @retval ORTE_ERROR(s) Operation failed, returning the provided error code. - * - * @code - * int32_t cnt; - * char *index; - * char *segment; - * - * status_code = orte_gpr.index(segment, &cnt, &index); - * @endcode - */ -typedef int (*orte_gpr_base_module_index_fn_t)(char *segment, orte_std_cntr_t *cnt, char ***index); - -/* - * Obtain an index of a specified dictionary (NON-BLOCKING) - * A non-blocking version of index. Result of the command is returned to the - * callback function in the notify msg format. - */ -typedef int (*orte_gpr_base_module_index_nb_fn_t)(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - -/* - * Subscribe to be notified upon a specified action - * The registry includes a publish/subscribe mechanism by which callers can be notified - * upon certain actions occuring to data objects stored on the registry. This function - * allows the caller to register for such notifications. The registry allows a subscription - * to be placed upon any segment, and upon the entire registry if desired. - * - * Two types of subscriptions are supported: - * (a) notifications - these occur whenever the specified action occurs on the - * identified data entries in the registry. - * - * (b) triggers - these occur whenever a count of the number of identified data - * entries reaches the specified level. The caller can specify that the trigger - * maintain its own count - in this case, the trigger will count the number of data - * entries in the registry that meet the specifications provided in \em value, and store - * the running count in the location specified by \em trig_value. Alternatively, the - * caller can specify that the trigger only monitor a count that is being maintained - * by someone else - in this case, the \em trig_value information is used to identify - * one or more "counters" that are to be monitored, with the trigger fired when either all - * identified counters reach the respective levels provided in \em trig_value (using - * the AT mode) or when the levels reach the same value (the CMP mode). - * - * Note that all addressing mode rules apply to both \em value and \em trig_value, - * including wildcards. - * - * @param actions (IN) The actions which are to generate a notification message and/or define - * the trigger operation. These can - * be OR'd together from the defined registry action flags. - * - * @param num_subs (IN) The numbr of subscription objects being provided - * - * @param **subscriptions (IN) A pointer to an array of subscription objects that - * contain descriptions of the data that is to be returned when a subscription fires. - * For subscribe requests that do NOT include a trigger, this is the data that - * will be monitored per the specified action. All of - * the described values will be returned in a notification message when the specified - * action occurs. - * - * @param num_trigs (IN) The number of trigger objects being provided - * - * @param **triggers (IN) A pointer to an array of orte_gpr_trigger_t objects that describe the - * conditions (as described above) which will generate a trigger message to be sent - * to the callback function. Trigger messages include all data specified in the - * subscription objects, but do NOT include the trigger counters themselves unless - * so specified with the ORTE_GPR_TRIG_INCLUDE_DATA command. - * - * @param *sub_number (OUT) The notify id for the resulting subscription - * is returned in the provided memory location. Callers should save this - * number for later use if (for example) it is desired to remove the - * subscription from the registry - * - * @retval ORTE_SUCCESS Operation was successfully completed. - * @retval ORTE_ERROR(s) Operation failed, returning the provided error code. - * - * @code - * orte_gpr_subscription_t *subscription; - * orte_gpr_notify_id_t sub_number; - * orte_gpr_value_t trig_value; - * - * status_code = orte_gpr.subscribe(action, 1, &subscription, &trig_value, - * &sub_number); - * @endcode - */ -typedef int (*orte_gpr_base_module_subscribe_fn_t)( - orte_std_cntr_t num_subs, - orte_gpr_subscription_t **subscriptions, - orte_std_cntr_t num_trigs, - orte_gpr_trigger_t **triggers); - -/* simplified subscription functions */ -typedef int (*orte_gpr_base_module_subscribe_1_fn_t)(orte_gpr_subscription_id_t *id, - char *trig_name, - char *sub_name, - orte_gpr_notify_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - char *key, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag); - -typedef int (*orte_gpr_base_module_subscribe_N_fn_t)(orte_gpr_subscription_id_t *id, - char *trig_name, - char *sub_name, - orte_gpr_notify_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag); - -typedef int (*orte_gpr_base_module_define_trigger_fn_t)(orte_gpr_trigger_id_t *id, - char *trig_name, - orte_gpr_trigger_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_gpr_trigger_cb_fn_t cbfunc, - void *user_tag); - -typedef int (*orte_gpr_base_module_define_trigger_level_fn_t)(orte_gpr_trigger_id_t *id, - char *trig_name, - orte_gpr_trigger_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_std_cntr_t *levels, - orte_gpr_trigger_cb_fn_t cbfunc, - void *user_tag); - -/* - * Cancel a subscription. - * Once a subscription has been entered on the registry, a caller may choose to permanently - * remove it at a later time. This function supports that request. - * - * @param sub_number The orte_gpr_subscription_id_t value returned by the original subscribe - * command. - * - * @retval ORTE_SUCCESS The subscription was removed. - * @retval ORTE_ERROR The subscription could not be removed - most likely caused by specifying - * a non-existent (or previously removed) subscription number. - * - * @code - * status_code = orte_gpr.unsubscribe(sub_number); - * @endcode - */ -typedef int (*orte_gpr_base_module_unsubscribe_fn_t)(orte_gpr_subscription_id_t sub_number); - -/* - * Cancel a trigger. - * Once a trigger has been entered on the registry, a caller may choose to permanently - * remove it at a later time. This function supports that request. - * - * @param trig_number The orte_gpr_trigger_id_t value returned by the original subscribe - * command. - * - * @retval ORTE_SUCCESS The trigger was removed. - * @retval ORTE_ERROR The trigger could not be removed - most likely caused by specifying - * a non-existent (or previously removed) trigger number. - * - * @code - * status_code = orte_gpr.cancel_trigger(trig_number); - * @endcode - */ -typedef int (*orte_gpr_base_module_cancel_trigger_fn_t)(orte_gpr_trigger_id_t trig_number); - - -/* Output the registry's contents to an output stream - * For debugging purposes, it is helpful to be able to obtain a complete formatted printout - * of the registry's contents. This function provides that ability. - * - * @param output_id The output stream id to which the registry's contents are to be - * printed. - * - * @retval ORTE_SUCCESS Operation was successfully completed. - * @retval ORTE_ERROR(s) Operation failed, returning the provided error code. - * - * @code - * orte_gpr.dump(output_id); - * @endcode - */ -typedef int (*orte_gpr_base_module_dump_all_fn_t)(void); - -typedef int (*orte_gpr_base_module_dump_segment_fn_t)(char *segment); - -typedef int (*orte_gpr_base_module_dump_triggers_fn_t)( - orte_gpr_trigger_id_t tail); - -typedef int (*orte_gpr_base_module_dump_subscriptions_fn_t)( - orte_gpr_subscription_id_t tail); - -typedef int (*orte_gpr_base_module_dump_a_trigger_fn_t)( - char *name, - orte_gpr_trigger_id_t id); - -typedef int (*orte_gpr_base_module_dump_a_subscription_fn_t)( - char *name, - orte_gpr_subscription_id_t id); - -typedef int (*orte_gpr_base_module_dump_local_triggers_fn_t)(void); - -typedef int (*orte_gpr_base_module_dump_local_subscriptions_fn_t)(void); - -typedef int (*orte_gpr_base_module_dump_callbacks_fn_t) (void); - -typedef int (*orte_gpr_base_module_dump_notify_msg_fn_t)(orte_gpr_notify_message_t *msg); - -typedef int (*orte_gpr_base_module_dump_notify_data_fn_t)(orte_gpr_notify_data_t *data); - -typedef int (*orte_gpr_base_module_dump_value_fn_t)(orte_gpr_value_t *value); - -typedef int (*orte_gpr_base_module_dump_segment_size_fn_t)(char *segment); - -/* - * Increment value - * This function increments the stored value of an existing registry entry by one. Failure - * to find the entry on the registry will result in an error. - */ -typedef int (*orte_gpr_base_module_increment_value_fn_t)(orte_gpr_value_t *value); - -/* - * Decrement value - * This function decrements the stored value of an existing registry entry by one. Failure - * to find the entry on the registry will result in an error. - */ -typedef int (*orte_gpr_base_module_decrement_value_fn_t)(orte_gpr_value_t *value); - -/* - * General arithmetic operation - * This function performs the specified arithmetic operation on registry entries - * defined by the search criteria. Note that the operation flags are those defined in - * orte/dss/dss_types.h (ORTE_DSS_ADD, ORTE_DSS_SUB, etc.). - * - * @param operation orte_dss_arith_op_t value indicating the operation to be performed - * @param value Pointer to an orte_data_value_t object containing the value to be used - * in the operation. This value will be added to or subtracted from the values found by - * the search criteria, or will multiply or divide those values, according to what was - * specified. The point here is that this value is always the right most in the specified - * operation (e.g., for division, it is (found entry) / value). - * - * The results of the operation are stored back in their original location, thus overwriting - * the original values. If no pre-existing entry is found, the entry will be created - * with the specified value. - */ -typedef int (*orte_gpr_base_module_arith_fn_t)(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_dss_arith_op_t operation, - orte_data_value_t *value); - - -/* Deliver a notify message - * To support the broadcast of stage gate messages that supply all subscribed - * data in a single message, we have to provide an API that allows the xcast - * to "inject" the message back into the registry's local delivery system. - * - * @param msg A pointer to the orte_gpr_notify_message_t object to be delivered. - * Note that the calling program is responsible for releasing this object. - * - * @retval None - */ -typedef int (*orte_gpr_base_module_deliver_notify_msg_t)(orte_gpr_notify_message_t *msg); - - -/* Create a gpr value structure - * To make it easier for users, this function will create an orte_gpr_value_t structure, - * including performing all the error checks to ensure adequate memory is available. - * - * Any data that the caller wishes to provide will be pre-loaded into the returned value. - * The function will allocate space for the value object and for the number of keyvals - * and tokens to be stored in the object. If the caller wishes to allocate that space - * themselves, or does not want space allocated for those purposes, then just pass a - * value of "0" (zero) and the function will not allocate memory to those areas. Likewise, - * a value of NULL for segment will cause the function to ignore that field in the - * value object. - * - * @retval ORTE_SUCCESS Value structure successfully created - * @retval ORTE_XXXX Appropriate error code indicating problem encountered. - */ -typedef int (*orte_gpr_base_module_create_value_fn_t)(orte_gpr_value_t **value, - orte_gpr_addr_mode_t addr_mode, - char *segment, - orte_std_cntr_t cnt, /**< Number of keyval objects */ - orte_std_cntr_t num_tokens); -/* Create a keyval object - * To make it easier for users, this function will create an orte_gpr_keyval_t object, - * including performing all the error checks to ensure adequate memory is available. - * - * Any data that the caller provides will be copied into the returned keyval object. - * If key or data are set to NULL, then those fields will be left to their default NULL - * values. - * - * @retval ORTE_SUCCESS Value structure successfully created - * @retval ORTE_XXXX Appropriate error code indicating problem encountered. - */ -typedef int (*orte_gpr_base_module_create_keyval_fn_t)(orte_gpr_keyval_t **keyval, - char *key, - orte_data_type_t type, - void *data); - - typedef int (*orte_gpr_base_module_ft_event_fn_t)(int state); - - -/* - * Ver 1.0.0 - */ -struct orte_gpr_base_module_1_0_0_t { - /* INIT */ - orte_gpr_base_module_init_fn_t init; - /* BLOCKING OPERATIONS */ - orte_gpr_base_module_get_fn_t get; - orte_gpr_base_module_get_conditional_fn_t get_conditional; - orte_gpr_base_module_put_fn_t put; - orte_gpr_base_module_put_1_fn_t put_1; - orte_gpr_base_module_put_N_fn_t put_N; - orte_gpr_base_module_delete_entries_fn_t delete_entries; - orte_gpr_base_module_delete_segment_fn_t delete_segment; - orte_gpr_base_module_index_fn_t index; - /* NON-BLOCKING OPERATIONS */ - orte_gpr_base_module_get_nb_fn_t get_nb; - orte_gpr_base_module_put_nb_fn_t put_nb; - orte_gpr_base_module_delete_entries_nb_fn_t delete_entries_nb; - orte_gpr_base_module_delete_segment_nb_fn_t delete_segment_nb; - orte_gpr_base_module_index_nb_fn_t index_nb; - /* GENERAL OPERATIONS */ - orte_gpr_base_module_create_value_fn_t create_value; - orte_gpr_base_module_create_keyval_fn_t create_keyval; - orte_gpr_base_module_preallocate_segment_fn_t preallocate_segment; - orte_gpr_base_module_get_number_entries_fn_t get_number_entries; - orte_gpr_base_module_deliver_notify_msg_t deliver_notify_msg; - /* ARITHMETIC OPERATIONS */ - orte_gpr_base_module_arith_fn_t arith; - orte_gpr_base_module_increment_value_fn_t increment_value; - orte_gpr_base_module_decrement_value_fn_t decrement_value; - /* SUBSCRIBE OPERATIONS */ - orte_gpr_base_module_subscribe_fn_t subscribe; - orte_gpr_base_module_subscribe_1_fn_t subscribe_1; - orte_gpr_base_module_subscribe_N_fn_t subscribe_N; - orte_gpr_base_module_define_trigger_fn_t define_trigger; - orte_gpr_base_module_define_trigger_level_fn_t define_trigger_level; - orte_gpr_base_module_unsubscribe_fn_t unsubscribe; - orte_gpr_base_module_cancel_trigger_fn_t cancel_trigger; - /* COMPOUND COMMANDS */ - orte_gpr_base_module_begin_compound_cmd_fn_t begin_compound_cmd; - orte_gpr_base_module_stop_compound_cmd_fn_t stop_compound_cmd; - orte_gpr_base_module_exec_compound_cmd_fn_t exec_compound_cmd; - orte_gpr_base_module_process_compound_cmd_fn_t process_compound_cmd; - /* DIAGNOSTIC OPERATIONS */ - orte_gpr_base_module_dump_all_fn_t dump_all; - orte_gpr_base_module_dump_segment_fn_t dump_segment; - orte_gpr_base_module_dump_triggers_fn_t dump_triggers; - orte_gpr_base_module_dump_subscriptions_fn_t dump_subscriptions; - orte_gpr_base_module_dump_a_trigger_fn_t dump_a_trigger; - orte_gpr_base_module_dump_a_subscription_fn_t dump_a_subscription; - orte_gpr_base_module_dump_local_triggers_fn_t dump_local_triggers; - orte_gpr_base_module_dump_local_subscriptions_fn_t dump_local_subscriptions; - orte_gpr_base_module_dump_callbacks_fn_t dump_callbacks; - orte_gpr_base_module_dump_notify_msg_fn_t dump_notify_msg; - orte_gpr_base_module_dump_notify_data_fn_t dump_notify_data; - orte_gpr_base_module_dump_value_fn_t dump_value; - orte_gpr_base_module_dump_segment_size_fn_t dump_segment_size; - /* CLEANUP OPERATIONS */ - orte_gpr_base_module_cleanup_job_fn_t cleanup_job; - orte_gpr_base_module_cleanup_proc_fn_t cleanup_process; - - orte_gpr_base_module_ft_event_fn_t ft_event; -}; -typedef struct orte_gpr_base_module_1_0_0_t orte_gpr_base_module_1_0_0_t; -typedef orte_gpr_base_module_1_0_0_t orte_gpr_base_module_t; - -/* - * GPR Component - */ - -typedef orte_gpr_base_module_t* (*orte_gpr_base_component_init_fn_t)( - bool *allow_multi_user_threads, - bool *have_hidden_threads, - int *priority); - -typedef int (*orte_gpr_base_component_finalize_fn_t)(void); - -/* - * the standard component data structure - */ - - -struct mca_gpr_base_component_1_0_0_t { - mca_base_component_t gpr_version; - mca_base_component_data_1_0_0_t gpr_data; - - orte_gpr_base_component_init_fn_t gpr_init; - orte_gpr_base_component_finalize_fn_t gpr_finalize; -}; -typedef struct mca_gpr_base_component_1_0_0_t mca_gpr_base_component_1_0_0_t; -typedef mca_gpr_base_component_1_0_0_t mca_gpr_base_component_t; - -/* - * Macro for use in modules that are of type gpr v1.0.0 - */ -#define MCA_GPR_BASE_VERSION_1_0_0 \ - /* gpr v1.0 is chained to MCA v1.0 */ \ - MCA_BASE_VERSION_1_0_0, \ - /* gpr v1.0 */ \ - "gpr", 1, 0, 0 - -/* - * global module that holds function pointers - */ -ORTE_DECLSPEC extern orte_gpr_base_module_t orte_gpr; /* holds selected module's function pointers */ - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/gpr/gpr_types.h b/orte/mca/gpr/gpr_types.h deleted file mode 100644 index 0e8ce5fb71..0000000000 --- a/orte/mca/gpr/gpr_types.h +++ /dev/null @@ -1,259 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file - */ - -/** - * \brief General Purpose Registry (GPR) API - * - * The Open MPI General Purpose Registry (GPR) - * - * This file contains the public type definitions supporting the GPR - */ - -#ifndef ORTE_GPR_TYPES_H_ -#define ORTE_GPR_TYPES_H_ - -#include "orte_config.h" -#include "orte/orte_types.h" - -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -#include "orte/mca/schema/schema.h" -#include "opal/class/opal_object.h" -#include "orte/class/orte_pointer_array.h" - -#include "orte/dss/dss_types.h" -#include "orte/mca/ns/ns_types.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** Define the notify actions for the subscription system - can be OR'd - * to create multiple actions - */ -#define ORTE_GPR_NOTIFY_NONE (uint8_t)0x00 /**< No trigger action */ -#define ORTE_GPR_NOTIFY_VALUE_CHG_TO (uint8_t)0x01 /**< Notifies subscriber when value changes to specified value */ -#define ORTE_GPR_NOTIFY_VALUE_CHG_FRM (uint8_t)0x02 /**< Notifies subscriber when value changes away from specified value */ -#define ORTE_GPR_NOTIFY_VALUE_CHG (uint8_t)0x03 /**< Notifies subscriber when value changes */ -#define ORTE_GPR_NOTIFY_ADD_ENTRY (uint8_t)0x04 /**< Notifies subscriber when entry added */ -#define ORTE_GPR_NOTIFY_DEL_ENTRY (uint8_t)0x08 /**< Notifies subscriber when entry deleted */ -#define ORTE_GPR_NOTIFY_ALL (uint8_t)0x0f /**< Notifies subscriber upon any action */ -#define ORTE_GPR_NOTIFY_PRE_EXISTING (uint8_t)0x10 /**< Provide list of all pre-existing data */ -#define ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG (uint8_t)0x20 /**< Notifies are off when subscription entered - turned on when trigger fires */ -#define ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG (uint8_t)0x40 /**< Delete this subscription after associated trigger fires */ -#define ORTE_GPR_NOTIFY_ANY (uint8_t)0xff /**< Used to test if any action flags set */ - -typedef uint8_t orte_gpr_notify_action_t; -#define ORTE_GPR_NOTIFY_ACTION_T ORTE_UINT8 - -typedef int32_t orte_gpr_subscription_id_t; -#define ORTE_GPR_SUBSCRIPTION_ID_T ORTE_INT32 -#define ORTE_GPR_SUBSCRIPTION_ID_MAX INT32_MAX - - -#define ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS (uint8_t)0x01 /**< Include the trigger data in the notification msg */ -#define ORTE_GPR_TRIG_ONE_SHOT (uint8_t)0x02 /**< Only trigger once - then delete trigger */ -#define ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME (uint8_t)0x04 /**< send all associated data to trigger callback fn */ -#define ORTE_GPR_TRIG_AT_LEVEL (uint8_t)0x08 /**< Trigger whenever count reaches specified level */ -#define ORTE_GPR_TRIG_CMP_LEVELS (uint8_t)0x80 /**< Trigger when all the specified values are equal */ -#define ORTE_GPR_TRIG_ALL_AT (uint8_t)0x7b /**< Use all trig defs except include trig data with AT - a typical situation */ -#define ORTE_GPR_TRIG_ALL_CMP (uint8_t)0xf3 /**< Use all trig defs except include trig data with CMP */ -#define ORTE_GPR_TRIG_ANY (uint8_t)0xff /**< Used to test if any trigs are set */ - -typedef uint8_t orte_gpr_trigger_action_t; -#define ORTE_GPR_TRIGGER_ACTION_T ORTE_UINT8 - -typedef int32_t orte_gpr_trigger_id_t; -#define ORTE_GPR_TRIGGER_ID_T ORTE_INT32 -#define ORTE_GPR_TRIGGER_ID_MAX INT32_MAX - - -/** Define the addressing mode bit-masks for registry operations. - * - * Token modes - */ -#define ORTE_GPR_TOKENS_AND (uint16_t)0x0001 /**< AND tokens together for search results */ -#define ORTE_GPR_TOKENS_OR (uint16_t)0x0002 /**< OR tokens for search results */ -#define ORTE_GPR_TOKENS_XAND (uint16_t)0x0004 /**< All tokens required, nothing else allowed */ -#define ORTE_GPR_TOKENS_XOR (uint16_t)0x0008 /**< Any one of the tokens required, nothing else allowed */ -#define ORTE_GPR_TOKENS_NOT (uint16_t)0x0010 /**< Everything except those that meet specs */ -/* - * Key modes - */ -#define ORTE_GPR_KEYS_AND (uint16_t)0x0100 /**< AND keys together */ -#define ORTE_GPR_KEYS_OR (uint16_t)0x0200 /**< OR keys together */ -#define ORTE_GPR_KEYS_XAND (uint16_t)0x0400 /**< All keys required, nothing else allowed */ -#define ORTE_GPR_KEYS_XOR (uint16_t)0x0800 /**< Any one of the keys required, nothing else allowed */ -#define ORTE_GPR_KEYS_NOT (uint16_t)0x1000 /**< Everything except those that meet specs */ -/* - * General modes - */ -#define ORTE_GPR_STRIPPED (uint16_t)0x2000 /**< Return values should contain no descriptive info */ -#define ORTE_GPR_OVERWRITE (uint16_t)0x8000 /**< Allow overwrite of existing info */ -#define ORTE_GPR_NO_OVERWRITE (uint16_t)0x0000 /**< Do not allow overwrite of existing info */ -#define ORTE_GPR_NO_DUPLICATE (uint16_t)0x4000 /**< Do not duplicate an existing entry - just ignore the new one */ - -typedef uint16_t orte_gpr_addr_mode_t; -#define ORTE_GPR_ADDR_MODE_T ORTE_UINT16 -/* - * typedefs - */ - /* - * Key-value pairs for registry operations - */ -typedef struct { - opal_list_item_t super; /* required for this to be on a list */ - char *key; /* string key for this value */ - orte_data_value_t *value; /* value */ -} orte_gpr_keyval_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_keyval_t); - - -/** Return value structure for registry requests. - * A request for information stored within the registry returns an array of values that - * correspond to the provided tokens. Each object in the array contains an array of - * keyvals from a specific container. Note that the array - * contains \em copies of the data in the registry. This prevents inadvertent - * modification of the registry, but requires the recipient to release the data's - * memory when done. - * - * The address mode and segment fields are included here for convenience and so that - * the structure can be re-used by the put command. - */ -typedef struct { - opal_object_t super; /**< Makes this an object */ - orte_gpr_addr_mode_t addr_mode; /**< Address mode that was used for combining keys/tokens */ - char *segment; /**< Name of the segment this came from */ - orte_std_cntr_t cnt; /**< Number of keyval objects returned */ - orte_gpr_keyval_t **keyvals; /**< Contiguous array of keyval object pointers */ - orte_std_cntr_t num_tokens; /**< Number of tokens from the container that held these keyvals */ - char **tokens; /**< List of tokens that described the container */ -} orte_gpr_value_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_value_t); -#define ORTE_GPR_VALUE_EMPTY { OPAL_OBJ_STATIC_INIT(orte_gpr_value_t), 0, NULL, 0, NULL, 0, NULL} - -/** Return structure for notification messages - * A notification message contains data from each registered subscription structure. - * Each block of data is associated with a specified callback function and contains - * data from a single segment, one or more containers with one or more keyvals/container. - */ -typedef struct orte_gpr_notify_data_t { - opal_object_t super; /**< Makes this an object */ - char *target; /**< Name of the associated subscripton, if provided */ - orte_gpr_subscription_id_t id; /**< Number of the associated subscription */ - bool remove; /**< Remove this subscription from recipient's tracker */ - orte_std_cntr_t cnt; /**< Number of value objects returned, one per container */ - orte_pointer_array_t *values; /**< Array of value objects returned */ -} orte_gpr_notify_data_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_notify_data_t); - -/** Return message for notify requests - */ -typedef uint8_t orte_gpr_notify_msg_type_t; -#define ORTE_GPR_NOTIFY_MSG_TYPE_T ORTE_UINT8 -#define ORTE_GPR_TRIGGER_MSG (orte_gpr_notify_msg_type_t)0x01 -#define ORTE_GPR_SUBSCRIPTION_MSG (orte_gpr_notify_msg_type_t)0x02 - -typedef struct { - opal_object_t super; /**< Make this an object */ - orte_gpr_notify_msg_type_t msg_type; /**< trigger or subscription msg */ - char *target; /**< Name of the associated trigger, if provided */ - orte_gpr_trigger_id_t id; /**< trigger id, if message comes from trigger - (ORTE_GPR_TRIGGER_ID_MAX otherwise) */ - bool remove; /**< Remove this trigger from recipient's tracker */ - orte_std_cntr_t cnt; /**< number of data objects */ - orte_pointer_array_t *data; /**< Contiguous array of pointers to data objects */ -} orte_gpr_notify_message_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_notify_message_t); - -/** Notify callback function - * notify_msg = message containing data provided by trigger - * - * user_tag = whatever tag data the user provided when filing the subscription - */ -typedef void (*orte_gpr_notify_cb_fn_t)(orte_gpr_notify_data_t *notify_data, void *user_tag); - -/** Trigger callback function - * notify_msg = message containing multiple blocks of data provided by trigger - * - * user_tag = whatever tag data the user provided when filing the subscription - * - * Since this only takes place locally, we CAN get a status code from the callback! - */ -typedef int (*orte_gpr_trigger_cb_fn_t)(orte_gpr_notify_message_t *msg); - -/** Structure for registering subscriptions - * A request to be notified when certain events occur, or when counters reach specified - * values, is registered on the registry via a subscription request. This structure - * is provided to concisely provide the required information. The information in this - * structure describes the data that is to be sent when the subscription "fires". It includes - * the segment upon which the data resides, the tokens that describe the containers, and - * the keys that describe the keyvals to be returned. These are combined via the - * addr_mode to locate and return the data. - */ -typedef struct { - opal_object_t super; /**< Makes this an object */ - char *name; /**< A unique name for this subscription - can be NULL */ - orte_gpr_subscription_id_t id; /**< id number of this subscription, as assigned by system */ - orte_gpr_notify_action_t action; /**< what causes subscription to fire */ - orte_std_cntr_t cnt; /**< Number of values included */ - orte_gpr_value_t **values; /**< Contiguous array of pointers to value objects - describing the data to be returned */ - orte_gpr_notify_cb_fn_t cbfunc; /**< the callback function */ - void *user_tag; /**< User-provided tag to be used in cbfunc */ -} orte_gpr_subscription_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_subscription_t); -#define ORTE_GPR_SUBSCRIPTION_EMPTY { OPAL_OBJ_STATIC_INIT(orte_gpr_subscription_t), NULL, ORTE_GPR_SUBSCRIPTION_ID_MAX, 0, 0, NULL, 0, NULL} - -/** Structure for registering triggers - * A trigger causes the associated subscriptions to be executed at a specified event, - * such as when counters reach specified values. The data provided here specifies - * which objects on the registry are to be monitored, and what conditions must - * exist between those objects for the trigger to be "fired". - */ -typedef struct { - opal_object_t super; /**< Makes this an object */ - char *name; /**< A unique name for this trigger - can be NULL */ - orte_gpr_trigger_id_t id; /**< id number of this trigger, as assigned by system */ - orte_gpr_trigger_action_t action; /**< trigger characteristics */ - orte_std_cntr_t cnt; /**< Number of values included */ - orte_gpr_value_t **values; /**< Contiguous array of pointers to value objects - describing the objects to be monitored */ - orte_gpr_trigger_cb_fn_t cbfunc; /**< the callback function */ - void *user_tag; /**< User-provided tag to be used in cbfunc */ -} orte_gpr_trigger_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_trigger_t); -#define ORTE_GPR_TRIGGER_EMPTY { OPAL_OBJ_STATIC_INIT(orte_gpr_trigger_t), NULL, ORTE_GPR_TRIGGER_ID_MAX, 0, 0, NULL, 0, NULL} - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* GPR_TYPES_H */ diff --git a/orte/mca/gpr/null/configure.m4 b/orte/mca/gpr/null/configure.m4 deleted file mode 100644 index 7c743e6801..0000000000 --- a/orte/mca/gpr/null/configure.m4 +++ /dev/null @@ -1,15 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2007 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_gpr_null_CONFIG(action-if-can-compile, -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_gpr_null_CONFIG],[$1]) diff --git a/orte/mca/gpr/null/gpr_null.c b/orte/mca/gpr/null/gpr_null.c deleted file mode 100644 index dabe5daa8a..0000000000 --- a/orte/mca/gpr/null/gpr_null.c +++ /dev/null @@ -1,433 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "orte/mca/gpr/gpr.h" -#include "orte/orte_constants.h" -#include "opal/util/output.h" -#include "orte/mca/gpr/base/base.h" -#include "orte/dss/dss_types.h" - - -static int -orte_gpr_null_module_init(void) -{ - return ORTE_SUCCESS; -} - - -static int -orte_gpr_null_begin_compound_cmd(orte_buffer_t *buffer) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_stop_compound_cmd(void) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_exec_compound_cmd(orte_buffer_t *buffer) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_process_compound_cmd(orte_buffer_t *buffer, - orte_process_name_t *name) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_cleanup_job(orte_jobid_t jobid) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_cleanup_proc(orte_process_name_t * proc) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_preallocate_segment(char *name, orte_std_cntr_t num_slots) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_get_number_entries(orte_std_cntr_t *n, char *segment, char **tokens) -{ - *n = 0; - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_delete_segment(char *segment) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_delete_segment_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_put(orte_std_cntr_t cnt, orte_gpr_value_t ** values) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_put_nb(orte_std_cntr_t cnt, orte_gpr_value_t ** values, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_get(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t * cnt, orte_gpr_value_t *** values) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int -orte_gpr_null_get_conditional(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t num_conditions, orte_gpr_keyval_t **conditions, - orte_std_cntr_t * cnt, orte_gpr_value_t *** values) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int -orte_gpr_null_get_nb(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int -orte_gpr_null_delete_entries(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - char **keys) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_delete_entries_nb(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - char **keys, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_index(char *segment, orte_std_cntr_t * cnt, char ***index) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_index_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_subscribe(orte_std_cntr_t num_subs, - orte_gpr_subscription_t ** subscriptions, - orte_std_cntr_t num_trigs, - orte_gpr_trigger_t ** triggers) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_unsubscribe(orte_gpr_subscription_id_t sub_number) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_cancel_trigger(orte_gpr_trigger_id_t trig_number) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_all(void) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_segments(char *segment) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_triggers(orte_gpr_trigger_id_t start) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_subscriptions(orte_gpr_subscription_id_t start) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_local_triggers(void) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_local_subscriptions(void) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_callbacks(void) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_notify_msg(orte_gpr_notify_message_t * msg) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_notify_data(orte_gpr_notify_data_t * data) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_value(orte_gpr_value_t * value) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_dump_segment_size(char *segment) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_increment_value(orte_gpr_value_t * value) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_decrement_value(orte_gpr_value_t * value) -{ - return ORTE_SUCCESS; -} - -static int -orte_gpr_null_arith(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_dss_arith_op_t operation, - orte_data_value_t *value) -{ - return ORTE_SUCCESS; -} - -static int orte_gpr_null_put_1(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - char *key, orte_data_value_t* value) -{ - return ORTE_SUCCESS; -} - - -static int orte_gpr_null_put_N(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, - orte_std_cntr_t n, char **keys, - orte_data_value_t **data_values) -{ - return ORTE_SUCCESS; -} - -static int orte_gpr_null_subscribe_1(orte_gpr_subscription_id_t *id, - char *trig_name, - char *sub_name, - orte_gpr_notify_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - char *key, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag) -{ - return ORTE_SUCCESS; -} - - -static int orte_gpr_null_subscribe_N(orte_gpr_subscription_id_t *id, - char *trig_name, - char *sub_name, - orte_gpr_notify_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_gpr_notify_cb_fn_t cbfunc, - void *user_tag) -{ - return ORTE_SUCCESS; -} - - -static int orte_gpr_null_define_trigger(orte_gpr_trigger_id_t *id, - char *trig_name, - orte_gpr_trigger_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_gpr_trigger_cb_fn_t cbfunc, - void *user_tag) -{ - return ORTE_SUCCESS; -} - -static int orte_gpr_null_define_trigger_level(orte_gpr_trigger_id_t *id, - char *trig_name, - orte_gpr_trigger_action_t action, - orte_gpr_addr_mode_t addr_mode, - char *segment, - char **tokens, - orte_std_cntr_t n, - char **keys, - orte_std_cntr_t *levels, - orte_gpr_trigger_cb_fn_t cbfunc, - void *user_tag) -{ - return ORTE_SUCCESS; -} - -static int orte_gpr_null_deliver_notify_msg(orte_gpr_notify_message_t *msg) -{ - return ORTE_SUCCESS; -} - -static int orte_gpr_null_dump_a_trigger( - char *name, - orte_gpr_trigger_id_t id) -{ - return ORTE_SUCCESS; -} - -static int orte_gpr_null_dump_a_subscription( - char *name, - orte_gpr_subscription_id_t id) -{ - return ORTE_SUCCESS; -} -/* - * setup the function pointers for the module - */ -orte_gpr_base_module_t orte_gpr_null_module = { - /* INIT */ - orte_gpr_null_module_init, - /* BLOCKING OPERATIONS */ - orte_gpr_null_get, - orte_gpr_null_get_conditional, - orte_gpr_null_put, - orte_gpr_null_put_1, - orte_gpr_null_put_N, - orte_gpr_null_delete_entries, - orte_gpr_null_delete_segment, - orte_gpr_null_index, - /* NON-BLOCKING OPERATIONS */ - orte_gpr_null_get_nb, - orte_gpr_null_put_nb, - orte_gpr_null_delete_entries_nb, - orte_gpr_null_delete_segment_nb, - orte_gpr_null_index_nb, - /* GENERAL OPERATIONS */ - orte_gpr_base_create_value, - orte_gpr_base_create_keyval, - orte_gpr_null_preallocate_segment, - orte_gpr_null_get_number_entries, - orte_gpr_null_deliver_notify_msg, - /* ARITHMETIC OPERATIONS */ - orte_gpr_null_arith, - orte_gpr_null_increment_value, - orte_gpr_null_decrement_value, - /* SUBSCRIBE OPERATIONS */ - orte_gpr_null_subscribe, - orte_gpr_null_subscribe_1, - orte_gpr_null_subscribe_N, - orte_gpr_null_define_trigger, - orte_gpr_null_define_trigger_level, - orte_gpr_null_unsubscribe, - orte_gpr_null_cancel_trigger, - /* COMPOUND COMMANDS */ - orte_gpr_null_begin_compound_cmd, - orte_gpr_null_stop_compound_cmd, - orte_gpr_null_exec_compound_cmd, - orte_gpr_null_process_compound_cmd, - /* DIAGNOSTIC OPERATIONS */ - orte_gpr_null_dump_all, - orte_gpr_null_dump_segments, - orte_gpr_null_dump_triggers, - orte_gpr_null_dump_subscriptions, - orte_gpr_null_dump_a_trigger, - orte_gpr_null_dump_a_subscription, - orte_gpr_null_dump_local_triggers, - orte_gpr_null_dump_local_subscriptions, - orte_gpr_null_dump_callbacks, - orte_gpr_null_dump_notify_msg, - orte_gpr_null_dump_notify_data, - orte_gpr_null_dump_value, - orte_gpr_null_dump_segment_size, - /* CLEANUP OPERATIONS */ - orte_gpr_null_cleanup_job, - orte_gpr_null_cleanup_proc -}; diff --git a/orte/mca/gpr/null/gpr_null_component.c b/orte/mca/gpr/null/gpr_null_component.c deleted file mode 100644 index bbad10d7e5..0000000000 --- a/orte/mca/gpr/null/gpr_null_component.c +++ /dev/null @@ -1,73 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "orte/mca/gpr/gpr.h" -#include "orte/orte_constants.h" -#include "orte/mca/gpr/null/gpr_null.h" - -extern orte_gpr_base_module_t orte_gpr_null_module; - -static int -orte_gpr_null_open(void) -{ - return ORTE_SUCCESS; -} - - -static int -orte_gpr_null_close(void) -{ - return ORTE_SUCCESS; -} - -static orte_gpr_base_module_t * -orte_gpr_null_init(bool *allow_multi_user_threads, - bool *have_hidden_threads, - int *priority) -{ - *priority = 0; - return &orte_gpr_null_module; -} - -static int -orte_gpr_null_finalize(void) -{ - return ORTE_SUCCESS; -} - -mca_gpr_base_component_t mca_gpr_null_component = { - { - MCA_GPR_BASE_VERSION_1_0_0, - - "null", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_gpr_null_open, /* module open */ - orte_gpr_null_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_gpr_null_init, /* module init */ - orte_gpr_null_finalize /* module shutdown */ -}; diff --git a/orte/mca/gpr/proxy/Makefile.am b/orte/mca/gpr/proxy/Makefile.am deleted file mode 100644 index 2850d593e1..0000000000 --- a/orte/mca/gpr/proxy/Makefile.am +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - gpr_proxy_arithmetic_ops.c \ - gpr_proxy_cleanup.c \ - gpr_proxy_component.c \ - gpr_proxy_compound_cmd.c \ - gpr_proxy_del_index.c \ - gpr_proxy_deliver_notify_msg.c \ - gpr_proxy_dump.c \ - gpr_proxy_dump_local_trigs_subs.c \ - gpr_proxy_general_operations.c \ - gpr_proxy_internals.c \ - gpr_proxy_put_get.c \ - gpr_proxy_subscribe.c \ - gpr_proxy.h - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_gpr_proxy_DSO -component_noinst = -component_install = mca_gpr_proxy.la -else -component_noinst = libmca_gpr_proxy.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_gpr_proxy_la_SOURCES = $(sources) -mca_gpr_proxy_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_gpr_proxy_la_SOURCES =$(sources) -libmca_gpr_proxy_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/gpr/proxy/configure.m4 b/orte/mca/gpr/proxy/configure.m4 deleted file mode 100644 index 09788793dc..0000000000 --- a/orte/mca/gpr/proxy/configure.m4 +++ /dev/null @@ -1,15 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2007 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_gpr_proxy_CONFIG(action-if-can-compile, -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_gpr_proxy_CONFIG],[$1]) diff --git a/orte/mca/gpr/proxy/gpr_proxy.h b/orte/mca/gpr/proxy/gpr_proxy.h deleted file mode 100644 index 2d27a346a6..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy.h +++ /dev/null @@ -1,268 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef ORTE_GPR_PROXY_H -#define ORTE_GPR_PROXY_H - - -#include "orte_config.h" - -#include "orte/orte_types.h" -#include "opal/class/opal_object.h" -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" -#include "orte/class/orte_pointer_array.h" -#include "orte/dss/dss_types.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/gpr/base/base.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_gpr_proxy_open(void); -int orte_gpr_proxy_close(void); - - -/* - * Startup / Shutdown - */ -orte_gpr_base_module_t* -orte_gpr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); -int orte_gpr_proxy_module_init(void); - -int orte_gpr_proxy_finalize(void); - -/* - * proxy-local types - */ -typedef struct { - opal_object_t super; /**< Allows this to be an object */ - orte_gpr_subscription_id_t id; /**< id of this subscription */ - orte_std_cntr_t index; /**< location of this subscription in array */ - char *name; - orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notificaiton */ - void *user_tag; /**< User-provided tag for callback function */ -} orte_gpr_proxy_subscriber_t; - -OBJ_CLASS_DECLARATION(orte_gpr_proxy_subscriber_t); - - -typedef struct { - opal_object_t super; /**< Allows this to be an object */ - orte_gpr_trigger_id_t id; /**< id of this trigger */ - orte_std_cntr_t index; /**< location of this trigger in array */ - char *name; - orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */ - void *user_tag; /**< User-provided tag for callback function */ -} orte_gpr_proxy_trigger_t; - -OBJ_CLASS_DECLARATION(orte_gpr_proxy_trigger_t); - - -/* - * globals used within proxy component - */ -typedef struct { - int debug; - orte_gpr_subscription_id_t num_subs; - orte_pointer_array_t *subscriptions; - orte_gpr_trigger_id_t num_trigs; - orte_pointer_array_t *triggers; - opal_mutex_t mutex; - bool compound_cmd_mode; - orte_buffer_t *compound_cmd; - opal_mutex_t wait_for_compound_mutex; - opal_condition_t compound_cmd_condition; - int compound_cmd_waiting; - bool timing; -} orte_gpr_proxy_globals_t; - -extern orte_gpr_proxy_globals_t orte_gpr_proxy_globals; - -/* - * Compound cmd functions - */ -int orte_gpr_proxy_begin_compound_cmd(orte_buffer_t *buffer); - -int orte_gpr_proxy_stop_compound_cmd(void); - -int orte_gpr_proxy_exec_compound_cmd(orte_buffer_t *buffer); - -int orte_gpr_proxy_process_compound_cmd(orte_buffer_t *buffer, - orte_process_name_t *name); - -/* - * Arithmetic operations - */ -int orte_gpr_proxy_arith(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_dss_arith_op_t operation, - orte_data_value_t *value); - -int orte_gpr_proxy_increment_value(orte_gpr_value_t *value); - -int orte_gpr_proxy_decrement_value(orte_gpr_value_t *value); - -/* - * Delete-index functions - */ -int orte_gpr_proxy_delete_segment(char *segment); - -int orte_gpr_proxy_delete_segment_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - -int orte_gpr_proxy_delete_entries(orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys); - -int orte_gpr_proxy_delete_entries_nb( - orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - -int orte_gpr_proxy_index(char *segment, orte_std_cntr_t *cnt, char ***index); - -int orte_gpr_proxy_index_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - - -/* - * Cleanup functions - */ -int orte_gpr_proxy_cleanup_job(orte_jobid_t jobid); - -int orte_gpr_proxy_cleanup_proc(orte_process_name_t *proc); - - -/* - * Put-get functions - */ -int orte_gpr_proxy_put(orte_std_cntr_t cnt, orte_gpr_value_t **values); - -int orte_gpr_proxy_put_nb(orte_std_cntr_t cnt, orte_gpr_value_t **values, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - -int orte_gpr_proxy_get(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values); - -int orte_gpr_proxy_get_conditional(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t num_conditions, orte_gpr_keyval_t **conditions, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values); - -int orte_gpr_proxy_get_nb(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - - -/* - * Subscribe functions - */ -int orte_gpr_proxy_subscribe(orte_std_cntr_t num_subs, - orte_gpr_subscription_t **subscriptions, - orte_std_cntr_t num_trigs, - orte_gpr_trigger_t **trigs); - -int orte_gpr_proxy_unsubscribe(orte_gpr_subscription_id_t sub_number); - -int orte_gpr_proxy_cancel_trigger(orte_gpr_trigger_id_t trig); - - -/* - * Diagnostic functions - */ -int orte_gpr_proxy_dump_all(void); - -int orte_gpr_proxy_dump_segments(char *segment); - -int orte_gpr_proxy_dump_triggers(orte_gpr_trigger_id_t start); - -int orte_gpr_proxy_dump_subscriptions(orte_gpr_subscription_id_t start); - -int orte_gpr_proxy_dump_a_trigger(char *name, - orte_gpr_trigger_id_t id); - -int orte_gpr_proxy_dump_a_subscription(char *name, - orte_gpr_subscription_id_t id); - -int orte_gpr_proxy_dump_local_triggers(void); - -int orte_gpr_proxy_dump_local_subscriptions(void); - -int orte_gpr_proxy_dump_callbacks(void); - -int orte_gpr_proxy_dump_notify_msg(orte_gpr_notify_message_t *msg); - -int orte_gpr_proxy_dump_notify_data(orte_gpr_notify_data_t *data); - -int orte_gpr_proxy_dump_value(orte_gpr_value_t *value); - -int orte_gpr_proxy_dump_segment_size(char *segment); - -/* - * General operations - */ -int orte_gpr_proxy_preallocate_segment(char *name, orte_std_cntr_t num_slots); - -int orte_gpr_proxy_get_number_entries(orte_std_cntr_t *n, char *segment, char **tokens); - -int orte_gpr_proxy_deliver_notify_msg(orte_gpr_notify_message_t *msg); - -/* - * Functions that interface to the replica - */ -void orte_gpr_proxy_notify_recv(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, orte_rml_tag_t tag, - void* cbdata); - - -int orte_gpr_proxy_ft_event(int state); - -/* - * Internal functions - */ - -int -orte_gpr_proxy_enter_subscription(orte_std_cntr_t cnt, orte_gpr_subscription_t **subscriptions); - -int -orte_gpr_proxy_remove_subscription(orte_gpr_proxy_subscriber_t *sub); - -int -orte_gpr_proxy_enter_trigger(orte_std_cntr_t cnt, orte_gpr_trigger_t **triggers); - - -int -orte_gpr_proxy_remove_trigger(orte_gpr_proxy_trigger_t *trig); - -/* - * - */ -ORTE_MODULE_DECLSPEC extern mca_gpr_base_component_t mca_gpr_proxy_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/gpr/proxy/gpr_proxy_arithmetic_ops.c b/orte/mca/gpr/proxy/gpr_proxy_arithmetic_ops.c deleted file mode 100644 index 6f28ce024a..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_arithmetic_ops.c +++ /dev/null @@ -1,220 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/dss/dss_types.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" - -#include "gpr_proxy.h" - - -int orte_gpr_proxy_arith(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_dss_arith_op_t operation, - orte_data_value_t *value) -{ - orte_buffer_t *cmd, *answer; - int rc, ret; - - OPAL_TRACE(1); - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_arith(orte_gpr_proxy_globals.compound_cmd, - addr_mode, segment, tokens, - keys, operation, value))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_arith(cmd, addr_mode, segment, tokens, - keys, operation, value))) { - OBJ_RELEASE(cmd); - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_arith(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - - return ret; -} - -int orte_gpr_proxy_increment_value(orte_gpr_value_t *value) -{ - orte_buffer_t *cmd, *answer; - int rc, ret; - - OPAL_TRACE(1); - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_increment_value( - orte_gpr_proxy_globals.compound_cmd, - value))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_increment_value(cmd, value))) { - OBJ_RELEASE(cmd); - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_increment_value(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - - return ret; -} - -int orte_gpr_proxy_decrement_value(orte_gpr_value_t *value) -{ - orte_buffer_t *cmd, *answer; - int rc, ret; - - OPAL_TRACE(1); - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_decrement_value( - orte_gpr_proxy_globals.compound_cmd, - value))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_decrement_value(cmd, value))) { - OBJ_RELEASE(cmd); - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_decrement_value(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - - return ret; -} diff --git a/orte/mca/gpr/proxy/gpr_proxy_cleanup.c b/orte/mca/gpr/proxy/gpr_proxy_cleanup.c deleted file mode 100644 index bbd4e05338..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_cleanup.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "opal/util/trace.h" -#include "orte/dss/dss_types.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" - -#include "gpr_proxy.h" - - -int orte_gpr_proxy_cleanup_job(orte_jobid_t jobid) -{ - orte_buffer_t *cmd, *answer; - int rc, ret; - - OPAL_TRACE(1); - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_cleanup_job(orte_gpr_proxy_globals.compound_cmd, jobid))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_cleanup_job(cmd, jobid))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_cleanup_job(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - - return ret; - -} - - -int orte_gpr_proxy_cleanup_proc(orte_process_name_t *proc) -{ - orte_buffer_t *cmd, *answer; - int rc, ret; - - OPAL_TRACE(1); - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_cleanup_proc(orte_gpr_proxy_globals.compound_cmd, proc))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_cleanup_proc(cmd, proc))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_cleanup_proc(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - - return ret; -} - -int orte_gpr_proxy_ft_event(int state) { - - if(OPAL_CRS_CHECKPOINT == state) { - /* - * Quiet the GPR globally. (NS ?) - * Here we want to make sure there are no pending notifications in the - * HNP GPR. So make sure everything is settled in the HNP. - */ - /* Check all of the subscriptions */ - /* orte_gpr_proxy_globals.subscriptions */ - - /* Check all of the triggers */ - /* orte_gpr_proxy_globals.triggers */ - - /* Stop the RML recv... */ - /* orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_GPR_NOTIFY); */ - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/proxy/gpr_proxy_component.c b/orte/mca/gpr/proxy/gpr_proxy_component.c deleted file mode 100644 index 090e27c97f..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_component.c +++ /dev/null @@ -1,423 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "opal/util/output.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/ns/ns.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "gpr_proxy.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_gpr_base_component_t mca_gpr_proxy_component = { - { - MCA_GPR_BASE_VERSION_1_0_0, - - "proxy", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_gpr_proxy_open, /* module open */ - orte_gpr_proxy_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_gpr_proxy_component_init, /* module init */ - orte_gpr_proxy_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_gpr_base_module_t orte_gpr_proxy = { - /* INIT */ - orte_gpr_proxy_module_init, - /* BLOCKING OPERATIONS */ - orte_gpr_proxy_get, - orte_gpr_proxy_get_conditional, - orte_gpr_proxy_put, - orte_gpr_base_put_1, - orte_gpr_base_put_N, - orte_gpr_proxy_delete_entries, - orte_gpr_proxy_delete_segment, - orte_gpr_proxy_index, - /* NON-BLOCKING OPERATIONS */ - orte_gpr_proxy_get_nb, - orte_gpr_proxy_put_nb, - orte_gpr_proxy_delete_entries_nb, - orte_gpr_proxy_delete_segment_nb, - orte_gpr_proxy_index_nb, - /* GENERAL OPERATIONS */ - orte_gpr_base_create_value, - orte_gpr_base_create_keyval, - orte_gpr_proxy_preallocate_segment, - orte_gpr_proxy_get_number_entries, - orte_gpr_proxy_deliver_notify_msg, - /* ARITHMETIC OPERATIONS */ - orte_gpr_proxy_arith, - orte_gpr_proxy_increment_value, - orte_gpr_proxy_decrement_value, - /* SUBSCRIBE OPERATIONS */ - orte_gpr_proxy_subscribe, - orte_gpr_base_subscribe_1, - orte_gpr_base_subscribe_N, - orte_gpr_base_define_trigger, - orte_gpr_base_define_trigger_level, - orte_gpr_proxy_unsubscribe, - orte_gpr_proxy_cancel_trigger, - /* COMPOUND COMMANDS */ - orte_gpr_proxy_begin_compound_cmd, - orte_gpr_proxy_stop_compound_cmd, - orte_gpr_proxy_exec_compound_cmd, - orte_gpr_proxy_process_compound_cmd, - /* DIAGNOSTIC OPERATIONS */ - orte_gpr_proxy_dump_all, - orte_gpr_proxy_dump_segments, - orte_gpr_proxy_dump_triggers, - orte_gpr_proxy_dump_subscriptions, - orte_gpr_proxy_dump_a_trigger, - orte_gpr_proxy_dump_a_subscription, - orte_gpr_proxy_dump_local_triggers, - orte_gpr_proxy_dump_local_subscriptions, - orte_gpr_proxy_dump_callbacks, - orte_gpr_proxy_dump_notify_msg, - orte_gpr_proxy_dump_notify_data, - orte_gpr_proxy_dump_value, - orte_gpr_proxy_dump_segment_size, - /* CLEANUP OPERATIONS */ - orte_gpr_proxy_cleanup_job, - orte_gpr_proxy_cleanup_proc, - orte_gpr_proxy_ft_event -}; - - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* - * globals needed within proxy component - */ -orte_gpr_proxy_globals_t orte_gpr_proxy_globals; - -/* SUBSCRIBER */ -/* constructor - used to initialize subscriber instance */ -static void orte_gpr_proxy_subscriber_construct(orte_gpr_proxy_subscriber_t* req) -{ - req->callback = NULL; - req->user_tag = NULL; - req->id = 0; - req->name = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_proxy_subscriber_destructor(orte_gpr_proxy_subscriber_t* req) -{ - if (NULL != req->name) free(req->name); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_proxy_subscriber_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_proxy_subscriber_construct, /* constructor */ - orte_gpr_proxy_subscriber_destructor); /* destructor */ - - -/* TRIGGER */ -/* constructor - used to initialize trigger instance */ -static void orte_gpr_proxy_trigger_construct(orte_gpr_proxy_trigger_t* req) -{ - req->callback = NULL; - req->user_tag = NULL; - req->id = 0; - req->name = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_proxy_trigger_destructor(orte_gpr_proxy_trigger_t* req) -{ - if (NULL != req->name) free(req->name); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_proxy_trigger_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_proxy_trigger_construct, /* constructor */ - orte_gpr_proxy_trigger_destructor); /* destructor */ - - -/* - * Open the component - */ -int orte_gpr_proxy_open(void) -{ - int id, tmp; - - id = mca_base_param_register_int("gpr", "proxy", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_gpr_proxy_globals.debug = true; - } else { - orte_gpr_proxy_globals.debug = false; - } - - return ORTE_SUCCESS; -} - -/* - * Close the component - */ -int orte_gpr_proxy_close(void) -{ - return ORTE_SUCCESS; -} - -orte_gpr_base_module_t* -orte_gpr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - orte_process_name_t name; - int ret, value; - - if (orte_gpr_proxy_globals.debug) { - opal_output(0, "gpr_proxy_init called"); - } - - /* If we are NOT to host a replica, then we want to be selected, so do all - the setup and return the module */ - if (NULL != orte_process_info.gpr_replica_uri) { - - if (orte_gpr_proxy_globals.debug) { - opal_output(0, "%s gpr_proxy_init: proxy selected", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - /* setup the replica location */ - if(ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.gpr_replica_uri, &name, NULL))) { - ORTE_ERROR_LOG(ret); - return NULL; - } - if(ORTE_SUCCESS != (ret = orte_dss.copy((void**)&orte_process_info.gpr_replica, &name, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - return NULL; - } - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other ns components). If - we're not the seed, then we don't want to be selected, so - return NULL. */ - - *priority = 10; - - /* We allow multi user threads but don't have any hidden threads */ - - *allow_multi_user_threads = true; - *have_hidden_threads = false; - - /* setup thread locks and condition variable */ - OBJ_CONSTRUCT(&orte_gpr_proxy_globals.mutex, opal_mutex_t); - OBJ_CONSTRUCT(&orte_gpr_proxy_globals.wait_for_compound_mutex, opal_mutex_t); - OBJ_CONSTRUCT(&orte_gpr_proxy_globals.compound_cmd_condition, opal_condition_t); - - /* initialize the registry compound mode */ - orte_gpr_proxy_globals.compound_cmd_mode = false; - orte_gpr_proxy_globals.compound_cmd_waiting = 0; - orte_gpr_proxy_globals.compound_cmd = NULL; - - /* initialize the subscription tracker */ - if (ORTE_SUCCESS != (ret = orte_pointer_array_init(&(orte_gpr_proxy_globals.subscriptions), - (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size))) { - ORTE_ERROR_LOG(ret); - return NULL; - } - orte_gpr_proxy_globals.num_subs = 0; - - /* initialize the trigger counter */ - if (ORTE_SUCCESS != (ret = orte_pointer_array_init(&(orte_gpr_proxy_globals.triggers), - (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size))) { - ORTE_ERROR_LOG(ret); - return NULL; - } - orte_gpr_proxy_globals.num_trigs = 0; - - /* check to see if we want timing information */ - mca_base_param_reg_int_name("orte", "timing", - "Request that critical timing loops be measured", - false, false, 0, &value); - if (value != 0) { - orte_gpr_proxy_globals.timing = true; - } - - initialized = true; - return &orte_gpr_proxy; - } else { - return NULL; - } -} - -int orte_gpr_proxy_module_init(void) -{ - /* issue the non-blocking receive */ - int rc; - rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_GPR_NOTIFY, ORTE_RML_PERSISTENT, orte_gpr_proxy_notify_recv, NULL); - if(rc < 0) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - - -/* - * finalize routine - */ -int orte_gpr_proxy_finalize(void) -{ - orte_std_cntr_t i; - orte_gpr_subscription_id_t j; - orte_gpr_trigger_id_t k; - orte_gpr_proxy_subscriber_t **lsubs; - orte_gpr_proxy_trigger_t **ltrigs; - - if (orte_gpr_proxy_globals.debug) { - opal_output(0, "%s gpr_proxy_finalize called", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - if (initialized) { - /* destruct the mutex and condition variables */ - OBJ_DESTRUCT(&orte_gpr_proxy_globals.mutex); - OBJ_DESTRUCT(&orte_gpr_proxy_globals.wait_for_compound_mutex); - OBJ_DESTRUCT(&orte_gpr_proxy_globals.compound_cmd_condition); - - /* clear the local subscriptions and triggers */ - if (NULL != orte_gpr_proxy_globals.subscriptions) { - lsubs = (orte_gpr_proxy_subscriber_t**)(orte_gpr_proxy_globals.subscriptions)->addr; - for (i=0, j=0; j < orte_gpr_proxy_globals.num_subs && - i < (orte_gpr_proxy_globals.subscriptions)->size; i++) { - if (NULL != lsubs[i]) { - j++; - OBJ_RELEASE(lsubs[i]); - } - } - OBJ_RELEASE(orte_gpr_proxy_globals.subscriptions); - } - - ltrigs = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr; - if (NULL != orte_gpr_proxy_globals.triggers) { - for (i=0, k=0; k < orte_gpr_proxy_globals.num_trigs && - i < (orte_gpr_proxy_globals.triggers)->size; i++) { - if (NULL != ltrigs[i]) { - k++; - OBJ_RELEASE(ltrigs[i]); - } - } - OBJ_RELEASE(orte_gpr_proxy_globals.triggers); - } - - initialized = false; - } - - /* All done */ - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_GPR_NOTIFY); - return ORTE_SUCCESS; -} - -/* - * handle notify messages from replicas - */ - -void orte_gpr_proxy_notify_recv(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_gpr_cmd_flag_t command; - orte_gpr_notify_message_t *msg; - orte_std_cntr_t n; - int rc; - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_GPR_NOTIFY_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - - msg = OBJ_NEW(orte_gpr_notify_message_t); - if (NULL == msg) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - goto RETURN_ERROR; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &msg, &n, ORTE_GPR_NOTIFY_MSG))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(msg); - goto RETURN_ERROR; - } - - /* process the message */ - if (ORTE_SUCCESS != (rc = orte_gpr_proxy_deliver_notify_msg(msg))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(msg); - goto RETURN_ERROR; - } - - /* release data */ - OBJ_RELEASE(msg); - - -RETURN_ERROR: - return; -} - diff --git a/orte/mca/gpr/proxy/gpr_proxy_compound_cmd.c b/orte/mca/gpr/proxy/gpr_proxy_compound_cmd.c deleted file mode 100644 index f0877db684..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_compound_cmd.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "opal/util/output.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" - -#include "gpr_proxy.h" - - -int orte_gpr_proxy_begin_compound_cmd(orte_buffer_t *buffer) -{ - orte_gpr_cmd_flag_t command; - int rc; - - command = ORTE_GPR_COMPOUND_CMD; - - OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.wait_for_compound_mutex); - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - orte_gpr_proxy_globals.compound_cmd_waiting++; - opal_condition_wait(&orte_gpr_proxy_globals.compound_cmd_condition, &orte_gpr_proxy_globals.wait_for_compound_mutex); - orte_gpr_proxy_globals.compound_cmd_waiting--; - } - - orte_gpr_proxy_globals.compound_cmd_mode = true; - orte_gpr_proxy_globals.compound_cmd = buffer; - - if (ORTE_SUCCESS != (rc = orte_dss.pack(orte_gpr_proxy_globals.compound_cmd, &command, - 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - orte_gpr_proxy_globals.compound_cmd_mode = false; - return rc; - } - - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.wait_for_compound_mutex); - return ORTE_SUCCESS; -} - - -int orte_gpr_proxy_stop_compound_cmd(void) -{ - OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.wait_for_compound_mutex); - - orte_gpr_proxy_globals.compound_cmd_mode = false; - orte_gpr_proxy_globals.compound_cmd = NULL; - - if (orte_gpr_proxy_globals.compound_cmd_waiting) { - opal_condition_signal(&orte_gpr_proxy_globals.compound_cmd_condition); - } - - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.wait_for_compound_mutex); - return ORTE_SUCCESS; -} - - -int orte_gpr_proxy_exec_compound_cmd(orte_buffer_t *buffer) -{ - orte_buffer_t *answer; - orte_gpr_cmd_flag_t command; - orte_std_cntr_t n; - int rc, response; - - if (orte_gpr_proxy_globals.debug) { - opal_output(0, "[%ld,%ld] transmitting compound command", - ORTE_NAME_ARGS(orte_process_info.my_name)); - } - - rc = ORTE_SUCCESS; - - OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.wait_for_compound_mutex); - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, orte_gpr_proxy_globals.compound_cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - rc = ORTE_ERR_COMM_FAILURE; - goto CLEANUP; - } - orte_gpr_proxy_globals.compound_cmd_mode = false; - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - rc = ORTE_ERR_COMM_FAILURE; - goto CLEANUP; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - goto CLEANUP; - } - - if (ORTE_GPR_COMPOUND_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - rc = ORTE_ERR_COMM_FAILURE; - goto CLEANUP; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &response, &n, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - } - OBJ_RELEASE(answer); /* done with this */ - - if (ORTE_SUCCESS == rc) { - rc = (int)response; - } - - CLEANUP: - if (orte_gpr_proxy_globals.compound_cmd_waiting) { - opal_condition_signal(&orte_gpr_proxy_globals.compound_cmd_condition); - } - - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.wait_for_compound_mutex); - - return rc; -} - -int orte_gpr_proxy_process_compound_cmd(orte_buffer_t *buffer, - orte_process_name_t *name) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - diff --git a/orte/mca/gpr/proxy/gpr_proxy_del_index.c b/orte/mca/gpr/proxy/gpr_proxy_del_index.c deleted file mode 100644 index c8d656f517..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_del_index.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" - -#include "gpr_proxy.h" - -/** - * globals - */ - -/* - * Implemented registry functions - */ - - -int orte_gpr_proxy_delete_segment(char *segment) -{ - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc, ret; - - OPAL_TRACE(1); - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - return orte_gpr_base_pack_delete_segment(orte_gpr_proxy_globals.compound_cmd, segment); - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_delete_segment(cmd, segment))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_delete_segment(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - - return ret; -} - - -int orte_gpr_proxy_delete_segment_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - - -int orte_gpr_proxy_delete_entries(orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys) -{ - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc, ret; - - OPAL_TRACE(1); - - /* need to protect against errors */ - if (NULL == segment) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_delete_entries(orte_gpr_proxy_globals.compound_cmd, - mode, segment, tokens, keys))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_delete_entries(cmd, - mode, segment, tokens, keys))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_delete_entries(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - - return ret; -} - - -int orte_gpr_proxy_delete_entries_nb( - orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - - -int orte_gpr_proxy_index(char *segment, orte_std_cntr_t *cnt, char ***index) -{ - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc, ret; - - OPAL_TRACE(1); - - if (NULL == index || NULL == cnt) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - *cnt = 0; - *index = NULL; - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_index(orte_gpr_proxy_globals.compound_cmd, segment))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_index(cmd, segment))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_index(answer, &ret, cnt, index))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - - return ret; -} - -int orte_gpr_proxy_index_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} diff --git a/orte/mca/gpr/proxy/gpr_proxy_deliver_notify_msg.c b/orte/mca/gpr/proxy/gpr_proxy_deliver_notify_msg.c deleted file mode 100755 index 160a25cb77..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_deliver_notify_msg.c +++ /dev/null @@ -1,161 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/dss/dss.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/ns/ns.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "gpr_proxy.h" - - -int orte_gpr_proxy_deliver_notify_msg(orte_gpr_notify_message_t *msg) -{ - orte_gpr_notify_data_t **data; - orte_gpr_proxy_subscriber_t **subs, *sub; - orte_gpr_proxy_trigger_t **trigs; - orte_std_cntr_t i, j, k, n; - int rc; - - OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex); - - OPAL_TRACE(1); - - /* we first have to check if the message is a trigger message - if so, - * then the message is intended to be - * sent as a single block to that trigger's callback function. - */ - if (ORTE_GPR_TRIGGER_MSG == msg->msg_type) { - trigs = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr; - for (i=0, j=0; j < orte_gpr_proxy_globals.num_trigs && - i < (orte_gpr_proxy_globals.triggers)->size; i++) { - if (NULL != trigs[i]){ - j++; - if (msg->id == trigs[i]->id) { - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - trigs[i]->callback(msg); - OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex); - rc = ORTE_SUCCESS; - if (msg->remove) { - /* remove the specified trigger from the local tracker */ - if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(trigs[i]))) { - ORTE_ERROR_LOG(rc); - } - } - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - } - } - - /* must not have been found - report error */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_NOT_FOUND; - } - - - /* get here if this wasn't a trigger message. Only other allowed message type - * is a subscription message - if that isn't the case, then we have corrupt - * data, so flag it and return - */ - if (ORTE_GPR_SUBSCRIPTION_MSG != msg->msg_type) { - ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_GPR_DATA_CORRUPT; - } - - /* get here if we have a subscription message - i.e., the message should - * be broken into its component parts and delivered separately - * to the indicated subscribers - */ - data = (orte_gpr_notify_data_t**)(msg->data)->addr; - for (i=0, n=0; n < msg->cnt && - i < (msg->data)->size; i++) { - if (NULL != data[i]) { - n++; - if (ORTE_GPR_SUBSCRIPTION_ID_MAX != data[i]->id || NULL != data[i]->target) { - /* for each datagram in the message, we need to lookup - * the associated subscription (could be specified by name or id) to find the correct - * callback function. Name specifications are given precedence over id. - */ - subs = (orte_gpr_proxy_subscriber_t**) - (orte_gpr_proxy_globals.subscriptions)->addr; - sub = NULL; - for (j=0, k=0; k < orte_gpr_proxy_globals.num_subs && - j < (orte_gpr_proxy_globals.subscriptions)->size; j++) { - if (NULL != subs[j]) { - k++; - if (NULL != data[i]->target) { - /* if target name provided, must use it */ - if (NULL != subs[j]->name && - 0 == strcmp(data[i]->target, subs[j]->name)) { - sub = subs[j]; - break; - } - } else if (data[i]->id == subs[j]->id) { - /* otherwise, see if id's match */ - sub = subs[j]; - break; - } - } - } - /* get here and not found => abort */ - if (NULL == sub) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_NOT_FOUND; - } - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - sub->callback(data[i], sub->user_tag); - OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex); - - if (data[i]->remove) { - if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(sub))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - } - } - } - } - - /* all done */ - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/proxy/gpr_proxy_dump.c b/orte/mca/gpr/proxy/gpr_proxy_dump.c deleted file mode 100644 index f9bef95d37..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_dump.c +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "orte/orte_constants.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" - -#include "gpr_proxy.h" - -int orte_gpr_proxy_dump_all(void) -{ - orte_gpr_cmd_flag_t command; - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc; - orte_std_cntr_t n; - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - return orte_gpr_base_pack_dump_all(orte_gpr_proxy_globals.compound_cmd); - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_all(cmd))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_GPR_DUMP_ALL_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - -int orte_gpr_proxy_dump_segments(char *segment) -{ - orte_gpr_cmd_flag_t command; - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc; - orte_std_cntr_t n; - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - return orte_gpr_base_pack_dump_segments(orte_gpr_proxy_globals.compound_cmd, segment); - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_segments(cmd, segment))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_GPR_DUMP_SEGMENTS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - -int orte_gpr_proxy_dump_triggers(orte_gpr_trigger_id_t start) -{ - orte_gpr_cmd_flag_t command; - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc; - orte_std_cntr_t n; - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - return orte_gpr_base_pack_dump_triggers(orte_gpr_proxy_globals.compound_cmd, start); - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_triggers(cmd, start))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_GPR_DUMP_TRIGGERS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - -int orte_gpr_proxy_dump_subscriptions(orte_gpr_subscription_id_t start) -{ - orte_gpr_cmd_flag_t command; - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc; - orte_std_cntr_t n; - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - return orte_gpr_base_pack_dump_subscriptions(orte_gpr_proxy_globals.compound_cmd, start); - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_subscriptions(cmd, start))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - -int orte_gpr_proxy_dump_a_trigger(char *name, - orte_gpr_trigger_id_t id) -{ - orte_gpr_cmd_flag_t command; - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc; - orte_std_cntr_t n; - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - return orte_gpr_base_pack_dump_a_trigger(orte_gpr_proxy_globals.compound_cmd, name, id); - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_a_trigger(cmd, name, id))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_GPR_DUMP_TRIGGERS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - - -int orte_gpr_proxy_dump_a_subscription(char *name, - orte_gpr_subscription_id_t id) -{ - orte_gpr_cmd_flag_t command; - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc; - orte_std_cntr_t n; - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - return orte_gpr_base_pack_dump_a_subscription(orte_gpr_proxy_globals.compound_cmd, name, id); - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_a_subscription(cmd, name, id))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_GPR_DUMP_TRIGGERS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - - -int orte_gpr_proxy_dump_callbacks(void) -{ - orte_gpr_cmd_flag_t command; - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc; - orte_std_cntr_t n; - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - return orte_gpr_base_pack_dump_callbacks(orte_gpr_proxy_globals.compound_cmd); - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_callbacks(cmd))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_GPR_DUMP_CALLBACKS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - -int orte_gpr_proxy_dump_notify_msg(orte_gpr_notify_message_t *msg) -{ - orte_buffer_t *answer; - int rc; - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_dump_notify_msg(answer, msg))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - - -int orte_gpr_proxy_dump_notify_data(orte_gpr_notify_data_t *data) -{ - orte_buffer_t *answer; - int rc; - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_dump_notify_data(answer, data))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - -int orte_gpr_proxy_dump_value(orte_gpr_value_t *value) -{ - orte_buffer_t *answer; - int rc; - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_dump_value(answer, value))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - -int orte_gpr_proxy_dump_segment_size(char *segment) -{ - orte_gpr_cmd_flag_t command; - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc; - orte_std_cntr_t n; - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - return orte_gpr_base_pack_dump_segment_size(orte_gpr_proxy_globals.compound_cmd, segment); - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_segment_size(cmd, segment))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &n, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_GPR_DUMP_SEGMENT_SIZE_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} diff --git a/orte/mca/gpr/proxy/gpr_proxy_dump_local_trigs_subs.c b/orte/mca/gpr/proxy/gpr_proxy_dump_local_trigs_subs.c deleted file mode 100644 index 3bc6412051..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_dump_local_trigs_subs.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "orte/orte_constants.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "opal/util/output.h" - -#include "gpr_proxy.h" - -int orte_gpr_proxy_dump_local_triggers(void) -{ - orte_gpr_proxy_trigger_t **trigs; - orte_std_cntr_t j, k; - - opal_output(orte_gpr_base_output, "DUMP OF LOCAL TRIGGERS for [%ld,%ld]\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); - opal_output(orte_gpr_base_output, "Number of triggers: %lu\n", (unsigned long) orte_gpr_proxy_globals.num_trigs); - - trigs = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr; - for (j=0, k=0; k < orte_gpr_proxy_globals.num_trigs && - j < (orte_gpr_proxy_globals.triggers)->size; j++) { - if (NULL != trigs[j]) { - k++; - opal_output(orte_gpr_base_output, "Data for trigger %lu", (unsigned long) trigs[j]->id); - if (NULL == trigs[j]->name) { - opal_output(orte_gpr_base_output, "\tNOT a named trigger"); - } else { - opal_output(orte_gpr_base_output, "\ttrigger name: %s", trigs[j]->name); - } - } - } - return ORTE_SUCCESS; -} - -int orte_gpr_proxy_dump_local_subscriptions(void) -{ - orte_gpr_proxy_subscriber_t **subs; - orte_std_cntr_t j, k; - - opal_output(orte_gpr_base_output, "DUMP OF LOCAL SUBSCRIPTIONS for [%ld,%ld]\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); - opal_output(orte_gpr_base_output, "Number of subscriptions: %lu\n", (unsigned long) orte_gpr_proxy_globals.num_subs); - - subs = (orte_gpr_proxy_subscriber_t**)(orte_gpr_proxy_globals.subscriptions)->addr; - for (j=0, k=0; k < orte_gpr_proxy_globals.num_subs && - j < (orte_gpr_proxy_globals.subscriptions)->size; j++) { - if (NULL != subs[j]) { - k++; - opal_output(orte_gpr_base_output, "Data for subscription %lu", (unsigned long) subs[j]->id); - if (NULL == subs[j]->name) { - opal_output(orte_gpr_base_output, "\tNOT a named subscription"); - } else { - opal_output(orte_gpr_base_output, "\tsubscription name: %s", subs[j]->name); - } - } - } - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/proxy/gpr_proxy_general_operations.c b/orte/mca/gpr/proxy/gpr_proxy_general_operations.c deleted file mode 100644 index 3e00ac9ebb..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_general_operations.c +++ /dev/null @@ -1,54 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - proxy component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/dss/dss_types.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" - -#include "gpr_proxy.h" - -int orte_gpr_proxy_preallocate_segment(char *name, orte_std_cntr_t num_slots) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - -int orte_gpr_proxy_get_number_entries(orte_std_cntr_t *n, char *segment, char **tokens) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} diff --git a/orte/mca/gpr/proxy/gpr_proxy_internals.c b/orte/mca/gpr/proxy/gpr_proxy_internals.c deleted file mode 100644 index 32a9dc65d5..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_internals.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "opal/util/trace.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/class/orte_pointer_array.h" -#include "orte/mca/gpr/proxy/gpr_proxy.h" - -int -orte_gpr_proxy_enter_subscription(orte_std_cntr_t cnt, orte_gpr_subscription_t **subscriptions) -{ - orte_gpr_proxy_subscriber_t *sub; - orte_std_cntr_t i; - - OPAL_TRACE(2); - - for (i=0; i < cnt; i++) { - sub = OBJ_NEW(orte_gpr_proxy_subscriber_t); - if (NULL == sub) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (NULL != subscriptions[i]->name) { - sub->name = strdup(subscriptions[i]->name); - } - sub->callback = subscriptions[i]->cbfunc; - sub->user_tag = subscriptions[i]->user_tag; - if (0 > orte_pointer_array_add(&sub->index, orte_gpr_proxy_globals.subscriptions, sub)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - sub->id = orte_gpr_proxy_globals.num_subs; - subscriptions[i]->id = sub->id; - (orte_gpr_proxy_globals.num_subs)++; - } - - return ORTE_SUCCESS; -} - - -int -orte_gpr_proxy_enter_trigger(orte_std_cntr_t cnt, orte_gpr_trigger_t **trigs) -{ - orte_gpr_proxy_trigger_t *trig, **tptr; - orte_std_cntr_t i, j, k; - - OPAL_TRACE(2); - - for (i=0; i < cnt; i++) { - /* If the provided trigger has a name, see if it already is on - * the local trigger list. If so, then check to see if we - * already defined a return point for it and/or if this trigger - * doesn't - in either of those two cases, we ignore the - * trigger and just use the existing entry - */ - if (NULL != trigs[i]->name) { - tptr = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr; - for (j=0, k=0; k < orte_gpr_proxy_globals.num_trigs && - j < (orte_gpr_proxy_globals.triggers)->size; j++) { - if (NULL != tptr[j]) { - k++; - if (NULL != tptr[j]->name && - 0 == strcmp(tptr[j]->name, trigs[i]->name)) { - /* same name - trigger is already on list */ - if (NULL != tptr[j]->callback || NULL == trigs[i]->cbfunc) { - /* ignore these cases */ - trig = tptr[j]; - goto MOVEON; - } - /* reach here if either the prior trigger didn't provide - * a callback, and the new one provides one. In this - * case, we update the existing trigger callback and then - * move on - */ - tptr[j]->callback = trigs[i]->cbfunc; - trig = tptr[j]; - goto MOVEON; - } - } - } - } - - /* either the trigger doesn't have a name, OR it did, but it isn't - * already on the list - add it to the list now - */ - trig = OBJ_NEW(orte_gpr_proxy_trigger_t); - if (NULL == trig) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (NULL != trigs[i]->name) { - trig->name = strdup(trigs[i]->name); - } - /* ensure that the proper routing flag is set - * in the action field to match the trigger callback - * function - */ - if (NULL != trigs[i]->cbfunc) { - trigs[i]->action = trigs[i]->action | - ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME; - } else { - trigs[i]->action = trigs[i]->action & - ~ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME; - } - trig->callback = trigs[i]->cbfunc; - trig->user_tag = trigs[i]->user_tag; - if (0 > orte_pointer_array_add(&trig->index, orte_gpr_proxy_globals.triggers, trig)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - trig->id = orte_gpr_proxy_globals.num_trigs; - (orte_gpr_proxy_globals.num_trigs)++; -MOVEON: - trigs[i]->id = trig->id; - } - - return ORTE_SUCCESS; -} - - -int -orte_gpr_proxy_remove_subscription(orte_gpr_proxy_subscriber_t *sub) -{ - orte_std_cntr_t index; - - OPAL_TRACE(2); - - if (NULL == sub) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - index = sub->index; - OBJ_RELEASE(sub); - orte_pointer_array_set_item(orte_gpr_proxy_globals.subscriptions, index, NULL); - - return ORTE_SUCCESS; -} - -int -orte_gpr_proxy_remove_trigger(orte_gpr_proxy_trigger_t *trig) -{ - orte_std_cntr_t index; - - OPAL_TRACE(2); - - if (NULL == trig) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - index = trig->index; - OBJ_RELEASE(trig); - orte_pointer_array_set_item(orte_gpr_proxy_globals.triggers, index, NULL); - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/proxy/gpr_proxy_put_get.c b/orte/mca/gpr/proxy/gpr_proxy_put_get.c deleted file mode 100644 index 46903155d1..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_put_get.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/dss/dss_types.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" - -#include "gpr_proxy.h" - -int orte_gpr_proxy_put(orte_std_cntr_t cnt, orte_gpr_value_t **values) -{ - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc, ret; - - OPAL_TRACE(1); - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_put(orte_gpr_proxy_globals.compound_cmd, cnt, values))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_put(cmd, cnt, values))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_put(answer, &ret))) { - ORTE_ERROR_LOG(rc); - } - OBJ_RELEASE(answer); - - return ret; -} - -int orte_gpr_proxy_put_nb(orte_std_cntr_t cnt, orte_gpr_value_t **values, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - -int orte_gpr_proxy_get(orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values) - -{ - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc, ret; - - OPAL_TRACE(1); - - *values = NULL; - *cnt = 0; - - /* need to protect against errors */ - if (NULL == segment) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_get(orte_gpr_proxy_globals.compound_cmd, - mode, segment, tokens, keys))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_get(cmd, mode, segment, tokens, keys))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_get(answer, &ret, cnt, values))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - OBJ_RELEASE(answer); - - return ret; -} - -int orte_gpr_proxy_get_conditional(orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t num_conditions, orte_gpr_keyval_t **conditions, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values) - -{ - orte_buffer_t *cmd; - orte_buffer_t *answer; - int rc, ret; - - OPAL_TRACE(1); - - *values = NULL; - *cnt = 0; - - /* need to protect against errors */ - if (NULL == segment) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_get_conditional(orte_gpr_proxy_globals.compound_cmd, - mode, segment, tokens, keys, num_conditions, conditions))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_get_conditional(cmd, mode, segment, tokens, keys, num_conditions, conditions))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_get(answer, &ret, cnt, values))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - OBJ_RELEASE(answer); - - return ret; -} - -int orte_gpr_proxy_get_nb(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} diff --git a/orte/mca/gpr/proxy/gpr_proxy_subscribe.c b/orte/mca/gpr/proxy/gpr_proxy_subscribe.c deleted file mode 100644 index de8c01a02d..0000000000 --- a/orte/mca/gpr/proxy/gpr_proxy_subscribe.c +++ /dev/null @@ -1,396 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/dss/dss.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml.h" - -#include "gpr_proxy.h" - -int -orte_gpr_proxy_subscribe(orte_std_cntr_t num_subs, - orte_gpr_subscription_t **subscriptions, - orte_std_cntr_t num_trigs, - orte_gpr_trigger_t **trigs) -{ - orte_buffer_t *cmd; - orte_buffer_t *answer; - orte_gpr_proxy_subscriber_t **subs; - int rc = ORTE_SUCCESS, ret; - orte_std_cntr_t i; - - OPAL_TRACE(1); - - /* need to protect against errors */ - if (NULL == subscriptions && NULL == trigs) { /* need at least one */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex); - - /* store callback function and user_tag in local list for lookup - * generate id_tag to send to replica to identify lookup entry - * for each subscription - */ - if (NULL != subscriptions) { - if (ORTE_SUCCESS != (rc = orte_gpr_proxy_enter_subscription( - num_subs, subscriptions))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - } - - /* if any triggers were provided, get id tags for them */ - if (NULL != trigs) { - if (ORTE_SUCCESS != (rc = orte_gpr_proxy_enter_trigger( - num_trigs, trigs))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - } - - /* check for compound cmd mode - if on, just pack the info into the - * compound cmd buffer and return - */ - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_subscribe(orte_gpr_proxy_globals.compound_cmd, - num_subs, subscriptions, - num_trigs, trigs))) { - ORTE_ERROR_LOG(rc); - goto subscribe_error; - } - - /* done */ - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_SUCCESS; - } - - /* if compound cmd not on, get new buffer to transmit command to replica */ - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto subscribe_error; - } - - /* pack the command and send it */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_subscribe(cmd, - num_subs, subscriptions, - num_trigs, trigs))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - goto subscribe_error; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - rc = ORTE_ERR_COMM_FAILURE; - goto subscribe_error; - } - - OBJ_RELEASE(cmd); - - /* get buffer for reply from replica and get it */ - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto subscribe_error; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - rc = ORTE_ERR_COMM_FAILURE; - goto subscribe_error; - } - - /* unpack the reply - should contain an echo of the subscribe command - * (to ensure the handshake) and the status code of the request. The - * unpack function checks the command for us - will return an error - * if the command was wrong - so all we have to do is record an - * error if the unpack command doesn't return "success", and return - * the resulting status code - */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_subscribe(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - goto subscribe_error; - } - - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_SUCCESS; - - /* if an error was encountered during processing this request, we need to - * remove the subscriptions from the subscription tracking system. do this - * and then exit. - * NOTE: there is no corresponding function to remove triggers from the local - * trigger tracking system as nothing is stored on it - we just keep track - * of how many triggers were generated so we can identify them, and the - * numbers are NOT re-used. - */ -subscribe_error: - subs = (orte_gpr_proxy_subscriber_t**)(orte_gpr_proxy_globals.subscriptions)->addr; - for (i=0; i < num_subs; i++) { - /* find the subscription on the local tracker */ - if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(subs[subscriptions[i]->id]))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - } - - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; -} - - -int orte_gpr_proxy_unsubscribe(orte_gpr_subscription_id_t sub_number) -{ - orte_buffer_t *cmd; - orte_buffer_t *answer; - orte_gpr_proxy_subscriber_t **subs; - orte_std_cntr_t i, j; - int rc, ret; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex); - - /* remove the specified subscription from the local tracker */ - subs = (orte_gpr_proxy_subscriber_t**)(orte_gpr_proxy_globals.subscriptions)->addr; - for (i=0, j=0; j < orte_gpr_proxy_globals.num_subs && - i < (orte_gpr_proxy_globals.subscriptions)->size; i++) { - if (NULL != subs[i]){ - j++; - if (sub_number == subs[i]->id) { - if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(subs[i]))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - goto PROCESS; - } - } - } - /* must not have been found - report error */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - -PROCESS: - /* if in compound cmd mode, then just pack the command into - * that buffer and return - */ - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_unsubscribe(orte_gpr_proxy_globals.compound_cmd, - sub_number))) { - ORTE_ERROR_LOG(rc); - } - - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - - /* if not in compound cmd mode, then init a new buffer to - * transmit the command - */ - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack and transmit the command */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_unsubscribe(cmd, sub_number))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* init a buffer to receive the replica's reply */ - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - /* unpack the response. This function will automatically check to ensure - * that the command in the response matches the unsubscribe command, thus - * verifying the handshake. If the function returns "success", then the - * commands match and the buffer could be unpacked - all we need do, then - * is return the replica's response code - */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_unsubscribe(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ret; - -} - -int orte_gpr_proxy_cancel_trigger(orte_gpr_trigger_id_t trig) -{ - orte_buffer_t *cmd; - orte_buffer_t *answer; - orte_gpr_proxy_trigger_t **trigs; - orte_std_cntr_t i, j; - int rc, ret; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex); - - /* remove the specified trigger from the local tracker */ - trigs = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr; - for (i=0, j=0; j < orte_gpr_proxy_globals.num_trigs && - i < (orte_gpr_proxy_globals.triggers)->size; i++) { - if (NULL != trigs[i]){ - j++; - if (trig == trigs[i]->id) { - if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(trigs[i]))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - goto PROCESS; - } - } - } - /* must not have been found - report error */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - -PROCESS: - /* if the compound cmd mode is on, pack the command into that buffer - * and return - */ - if (orte_gpr_proxy_globals.compound_cmd_mode) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_cancel_trigger( - orte_gpr_proxy_globals.compound_cmd, trig))) { - ORTE_ERROR_LOG(rc); - } - - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - - /* if compound cmd mode is off, init a new buffer for transmitting the - * command to the replica - */ - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* pack the trigger number and transmit the command */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_cancel_trigger(cmd, trig))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - - if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - /* init a buffer to receive the replica's response */ - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - /* unpack the response. This function will automatically check to ensure - * that the command in the response matches the cancel_trigger command, thus - * verifying the handshake. If the function returns "success", then the - * commands match and the buffer could be unpacked - all we need do, then - * is return the replica's response code - */ - if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_unsubscribe(answer, &ret))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return rc; - } - - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); - return ret; - -} diff --git a/orte/mca/gpr/replica/Makefile.am b/orte/mca/gpr/replica/Makefile.am deleted file mode 100644 index 31737cf151..0000000000 --- a/orte/mca/gpr/replica/Makefile.am +++ /dev/null @@ -1,55 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -SUBDIRS = api_layer functional_layer transition_layer communications - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_gpr_replica_DSO -component_noinst = -component_install = mca_gpr_replica.la -else -component_noinst = libmca_gpr_replica.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_gpr_replica_la_SOURCES = gpr_replica.h \ - gpr_replica_class_instances.h \ - gpr_replica_component.c - -mca_gpr_replica_la_LIBADD = \ - api_layer/libmca_gpr_replica_api.la \ - transition_layer/libmca_gpr_replica_tl.la \ - functional_layer/libmca_gpr_replica_fn.la \ - communications/libmca_gpr_replica_comm.la -mca_gpr_replica_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_gpr_replica_la_SOURCES = gpr_replica.h \ - gpr_replica_class_instances.h \ - gpr_replica_component.c -libmca_gpr_replica_la_LIBADD = \ - api_layer/libmca_gpr_replica_api.la \ - transition_layer/libmca_gpr_replica_tl.la \ - functional_layer/libmca_gpr_replica_fn.la \ - communications/libmca_gpr_replica_comm.la -libmca_gpr_replica_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/gpr/replica/api_layer/Makefile.am b/orte/mca/gpr/replica/api_layer/Makefile.am deleted file mode 100644 index 122f86424e..0000000000 --- a/orte/mca/gpr/replica/api_layer/Makefile.am +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -noinst_LTLIBRARIES = libmca_gpr_replica_api.la -libmca_gpr_replica_api_la_SOURCES = \ - gpr_replica_api.h \ - gpr_replica_segment_ops_api.c \ - gpr_replica_cleanup_api.c \ - gpr_replica_compound_cmd_api.c \ - gpr_replica_del_index_api.c \ - gpr_replica_deliver_notify_msg_api.c \ - gpr_replica_dump_api.c \ - gpr_replica_dump_local_trigs_subs_api.c \ - gpr_replica_arithmetic_ops_api.c \ - gpr_replica_put_get_api.c \ - gpr_replica_subscribe_api.c diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_api.h b/orte/mca/gpr/replica/api_layer/gpr_replica_api.h deleted file mode 100644 index d152a76a5e..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_api.h +++ /dev/null @@ -1,186 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -/** @file - */ - -#ifndef ORTE_GPR_REPLICA_API_H -#define ORTE_GPR_REPLICA_API_H - - -#include "orte_config.h" - -#include - -#include "orte/class/orte_pointer_array.h" - -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" - -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" - -#include "orte/mca/gpr/replica/gpr_replica.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - -/* - * Implemented registry functions - see gpr.h for documentation - */ - -/* - * Compound cmd functions - */ -int orte_gpr_replica_begin_compound_cmd(orte_buffer_t *buffer); - -int orte_gpr_replica_stop_compound_cmd(void); - -int orte_gpr_replica_exec_compound_cmd(orte_buffer_t *buffer); - -int orte_gpr_replica_process_compound_cmd(orte_buffer_t *buffer, - orte_process_name_t *name); - -/* - * Arithmetic operations - */ -int orte_gpr_replica_arith(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_dss_arith_op_t operation, - orte_data_value_t *operand); - -int orte_gpr_replica_increment_value(orte_gpr_value_t *value); - -int orte_gpr_replica_decrement_value(orte_gpr_value_t *value); - -/* - * Delete-index functions - */ -int orte_gpr_replica_delete_segment(char *segment); - -int orte_gpr_replica_delete_segment_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - -int orte_gpr_replica_delete_entries(orte_gpr_addr_mode_t mode, - char *segment, char **tokens, char **keys); - -int orte_gpr_replica_delete_entries_nb( - orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - -int orte_gpr_replica_index(char *segment, orte_std_cntr_t *cnt, char ***index); - -int orte_gpr_replica_index_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - - -/* - * Cleanup functions - */ -int orte_gpr_replica_cleanup_job(orte_jobid_t jobid); - -int orte_gpr_replica_cleanup_proc(orte_process_name_t *proc); - - -/* - * Put-get functions - */ -int orte_gpr_replica_put(orte_std_cntr_t cnt, orte_gpr_value_t **values); - -int orte_gpr_replica_put_nb(orte_std_cntr_t cnt, orte_gpr_value_t **values, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - -int orte_gpr_replica_get(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values); - -int orte_gpr_replica_get_conditional(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t num_conditions, orte_gpr_keyval_t **conditions, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values); - -int orte_gpr_replica_get_nb(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - - -/* - * Subscribe functions - */ -int orte_gpr_replica_subscribe(orte_std_cntr_t num_subs, - orte_gpr_subscription_t **subscriptions, - orte_std_cntr_t num_trigs, - orte_gpr_trigger_t **trigs); - -int orte_gpr_replica_unsubscribe(orte_gpr_subscription_id_t sub_number); - -int orte_gpr_replica_cancel_trigger(orte_gpr_trigger_id_t trig); - -/* - * Diagnostic functions - */ -int orte_gpr_replica_dump_all(void); - -int orte_gpr_replica_dump_segments(char *segment); - -int orte_gpr_replica_dump_triggers(orte_gpr_trigger_id_t start); - -int orte_gpr_replica_dump_subscriptions(orte_gpr_subscription_id_t start); - -int orte_gpr_replica_dump_a_trigger( - char *name, - orte_gpr_trigger_id_t id); - -int orte_gpr_replica_dump_a_subscription( - char *name, - orte_gpr_subscription_id_t id); - -int orte_gpr_replica_dump_local_triggers(void); - -int orte_gpr_replica_dump_local_subscriptions(void); - -int orte_gpr_replica_dump_callbacks(void); - -int orte_gpr_replica_dump_notify_msg(orte_gpr_notify_message_t *msg); - -int orte_gpr_replica_dump_notify_data(orte_gpr_notify_data_t *data); - -int orte_gpr_replica_dump_value(orte_gpr_value_t *value); - -int orte_gpr_replica_dump_segment_size(char *segment); - -/* - * General functions - */ -int orte_gpr_replica_preallocate_segment(char *name, orte_std_cntr_t num_slots); - -int orte_gpr_replica_get_number_entries(orte_std_cntr_t *n, char *segment, char **tokens); - -int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_arithmetic_ops_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_arithmetic_ops_api.c deleted file mode 100644 index 7effdc26b9..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_arithmetic_ops_api.c +++ /dev/null @@ -1,211 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "gpr_replica_api.h" - - -int orte_gpr_replica_arith(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_dss_arith_op_t operation, - orte_data_value_t *operand) -{ - int rc; - orte_std_cntr_t num_tokens=0, num_keys=0; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *itags=NULL, *keytags=NULL; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /** find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, segment))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - - /** convert tokens to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&itags, seg, - tokens, &num_tokens))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - - /** convert keys to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&keytags, seg, - keys, &num_keys))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_arith_op_fn(addr_mode, seg, itags, num_tokens, - num_keys, keytags, operation, operand))) { - ORTE_ERROR_LOG(rc); - } - -CLEANUP: - /** release lists of itags */ - if (NULL != itags) { - free(itags); - } - if (NULL != keytags) { - free(keytags); - } - - if (ORTE_SUCCESS == rc) { - if (ORTE_SUCCESS != - (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - rc = orte_gpr_replica_process_callbacks(); - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - -int orte_gpr_replica_increment_value(orte_gpr_value_t *value) -{ - int rc; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *itags=NULL; - - OPAL_TRACE(1); - - /* protect ourselves against errors */ - if (NULL == value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, value->segment))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&itags, seg, - value->tokens, &(value->num_tokens)))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_increment_value_fn(value->addr_mode, seg, - itags, value->num_tokens, value->cnt, value->keyvals))) { - ORTE_ERROR_LOG(rc); - } - - /* release list of itags */ - if (NULL != itags) { - free(itags); - } - - if (ORTE_SUCCESS == rc) { - if (ORTE_SUCCESS != - (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - rc = orte_gpr_replica_process_callbacks(); - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - -int orte_gpr_replica_decrement_value(orte_gpr_value_t *value) -{ - int rc; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *itags=NULL; - - OPAL_TRACE(1); - - /* protect ourselves against errors */ - if (NULL == value) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, value->segment))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&itags, seg, - value->tokens, &(value->num_tokens)))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_decrement_value_fn(value->addr_mode, seg, - itags, value->num_tokens, value->cnt, value->keyvals))) { - ORTE_ERROR_LOG(rc); - } - - /* release list of itags */ - if (NULL != itags) { - free(itags); - } - - if (ORTE_SUCCESS == rc) { - if (ORTE_SUCCESS != - (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - rc = orte_gpr_replica_process_callbacks(); - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; -} diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_cleanup_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_cleanup_api.c deleted file mode 100644 index 11b45b8d05..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_cleanup_api.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/mca/ns/ns.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "gpr_replica_api.h" -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - - -int orte_gpr_replica_cleanup_job(orte_jobid_t jobid) -{ - int rc; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - rc = orte_gpr_replica_cleanup_job_fn(jobid); - - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - rc = orte_gpr_replica_process_callbacks(); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - - -int orte_gpr_replica_cleanup_proc(orte_process_name_t *proc) -{ - int rc; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - rc = orte_gpr_replica_cleanup_proc_fn(proc); - - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - rc = orte_gpr_replica_process_callbacks(); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_compound_cmd_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_compound_cmd_api.c deleted file mode 100644 index ed6c883f7f..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_compound_cmd_api.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/dss/dss_types.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -#include "gpr_replica_api.h" - -/* COMPOUND COMMANDS ARE NOT USED ON THE REPLICA - * Any process co-located with the replica will "drive" the registry - * directly - */ -int orte_gpr_replica_begin_compound_cmd(orte_buffer_t *buffer) -{ - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_stop_compound_cmd(void) -{ - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_exec_compound_cmd(orte_buffer_t *buffer) -{ - return ORTE_SUCCESS; -} - -int orte_gpr_replica_process_compound_cmd(orte_buffer_t *buffer, - orte_process_name_t *name) -{ - orte_buffer_t *answer=NULL; - int rc; - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_process_command_buffer(buffer, name, &answer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != answer) OBJ_RELEASE(answer); /* don't need this */ - - return rc; -} diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_del_index_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_del_index_api.c deleted file mode 100644 index 50bba565de..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_del_index_api.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "gpr_replica_api.h" - -int orte_gpr_replica_delete_segment(char *segment) -{ - orte_gpr_replica_segment_t *seg=NULL; - int rc; - - OPAL_TRACE(1); - - /* protect against errors */ - if (NULL == segment) { - return ORTE_ERROR; - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* locate the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, false, segment))) { - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - rc = orte_gpr_replica_release_segment(&seg); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; -} - - -int orte_gpr_replica_delete_segment_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - - - -int orte_gpr_replica_delete_entries(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys) -{ - int rc; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *token_itags=NULL, *key_itags=NULL; - orte_std_cntr_t num_tokens = 0, num_keys = 0; - - OPAL_TRACE(1); - - /* protect against errors */ - if (NULL == segment) { - return ORTE_ERROR; - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* locate the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, false, segment))) { - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&token_itags, seg, tokens, &num_tokens))) { - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&key_itags, seg, keys, &num_keys))) { - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - rc = orte_gpr_replica_delete_entries_fn(addr_mode, seg, - token_itags, num_tokens, - key_itags, num_keys); - - if (ORTE_SUCCESS == rc) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - } - } - - if (NULL != token_itags) { - free(token_itags); - } - - if (NULL != key_itags) { - free(key_itags); - } - - if (ORTE_SUCCESS == rc) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_process_callbacks())) { - ORTE_ERROR_LOG(rc); - } - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - - -int orte_gpr_replica_delete_entries_nb( - orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - - -int orte_gpr_replica_index(char *segment, orte_std_cntr_t *cnt, char ***index) -{ - orte_gpr_replica_segment_t *seg=NULL; - int rc; - - OPAL_TRACE(1); - - if (NULL == index || NULL == cnt) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - if (NULL == segment) { /* want global level index */ - seg = NULL; - } else { - /* locate the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, false, segment))) { - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - } - - rc = orte_gpr_replica_index_fn(seg, cnt, index); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; -} - -int orte_gpr_replica_index_nb(char *segment, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_deliver_notify_msg_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_deliver_notify_msg_api.c deleted file mode 100755 index 61d8bfff75..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_deliver_notify_msg_api.c +++ /dev/null @@ -1,152 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/api_layer/gpr_replica_api.h" - -int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg) -{ - orte_gpr_notify_data_t **data; - orte_gpr_replica_local_trigger_t **local_trigs; - orte_gpr_replica_local_subscriber_t **local_subs, *sub; - orte_gpr_trigger_cb_fn_t trig_cb; - orte_gpr_notify_cb_fn_t sub_cb; - void *sub_usertag; - orte_std_cntr_t i, j, k, n; - int rc; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* we first have to check if the message is a trigger message - if so, - * then the message is intended to be - * sent as a single block to that trigger's callback function. - */ - if (ORTE_GPR_TRIGGER_MSG == msg->msg_type) { - /* use the local trigger callback */ - local_trigs = (orte_gpr_replica_local_trigger_t**) - (orte_gpr_replica_globals.local_triggers)->addr; - for (i=0, j=0; j < orte_gpr_replica_globals.num_local_trigs && - i < (orte_gpr_replica_globals.local_triggers)->size; i++) { - if (NULL != local_trigs[i]) { - j++; - if (msg->id == local_trigs[i]->id) { - trig_cb = local_trigs[i]->callback; - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - trig_cb(msg); /* JJH This is a potential thread problem. Needs a deeper look */ - return ORTE_SUCCESS; - } - } - } - /* trigger could not be found */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return ORTE_ERR_NOT_FOUND; - } - - /* get here if this wasn't a trigger message. Only other allowed message type - * is a subscription message - if that isn't the case, then we have corrupt - * data, so flag it and return - */ - if (ORTE_GPR_SUBSCRIPTION_MSG != msg->msg_type) { - ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return ORTE_ERR_GPR_DATA_CORRUPT; - } - - /* get here if we have a subscription message - i.e., the message should - * be broken into its component parts and delivered separately - * to the indicated subscribers - */ - data = (orte_gpr_notify_data_t**)(msg->data)->addr; - for (i=0, n=0; n < msg->cnt && - i < (msg->data)->size; i++) { - if (NULL != data[i]) { - n++; - if (ORTE_GPR_SUBSCRIPTION_ID_MAX != data[i]->id || NULL != data[i]->target) { - /* for each datagram in the message, we need to lookup - * the associated subscription (could be specified by name or id) to find the correct - * callback function. Name specifications are given precedence over id. - */ - local_subs = (orte_gpr_replica_local_subscriber_t**) - (orte_gpr_replica_globals.local_subscriptions)->addr; - sub = NULL; - for (j=0, k=0; k < orte_gpr_replica_globals.num_local_subs && - j < (orte_gpr_replica_globals.local_subscriptions)->size; j++) { - if (NULL != local_subs[j]) { - k++; - if (NULL != data[i]->target) { - /* if target name provided, must use it */ - if (NULL != local_subs[j]->name && - 0 == strcmp(data[i]->target, local_subs[j]->name)) { - sub = local_subs[j]; - break; - } - } else if (data[i]->id == local_subs[j]->id) { - /* otherwise, see if id's match */ - sub = local_subs[j]; - break; - } - } - } - - /* get here and not found => abort */ - if (NULL == sub ) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return ORTE_ERR_NOT_FOUND; - } - - sub_cb = sub->callback; - sub_usertag = sub->user_tag; - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - sub_cb(data[i], sub_usertag); /* JJH This is a potential thread problem. Needs a deeper look */ - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - if (data[i]->remove) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_local_subscription(sub))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - } - } - } - } - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - /* the calling program will release the message object */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_dump_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_dump_api.c deleted file mode 100644 index af2c71cfe5..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_dump_api.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/output.h" - -#include "orte/dss/dss_types.h" -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/api_layer/gpr_replica_api.h" - -int orte_gpr_replica_dump_all(void) -{ - orte_buffer_t *buffer; - int rc; - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr_replica_dump_all: entered", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - buffer = OBJ_NEW(orte_buffer_t); - if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_all_fn(buffer))) { - ORTE_ERROR_LOG(rc); - } - - if (ORTE_SUCCESS == rc) { - orte_gpr_base_print_dump(buffer); - } - OBJ_RELEASE(buffer); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - -int orte_gpr_replica_dump_segments(char *segment) -{ - orte_buffer_t *buffer; - int rc; - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr_replica_dump_segments: entered", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - buffer = OBJ_NEW(orte_buffer_t); - if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_segments_fn(buffer, segment))) { - ORTE_ERROR_LOG(rc); - } - - if (ORTE_SUCCESS == rc) { - orte_gpr_base_print_dump(buffer); - } - OBJ_RELEASE(buffer); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - -int orte_gpr_replica_dump_triggers(orte_gpr_trigger_id_t start) -{ - orte_buffer_t *buffer; - int rc; - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr_replica_dump_triggers: entered", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - buffer = OBJ_NEW(orte_buffer_t); - if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer, start))) { - ORTE_ERROR_LOG(rc); - } - - if (ORTE_SUCCESS == rc) { - orte_gpr_base_print_dump(buffer); - } - OBJ_RELEASE(buffer); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - -int orte_gpr_replica_dump_subscriptions(orte_gpr_subscription_id_t start) -{ - orte_buffer_t *buffer; - int rc; - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - buffer = OBJ_NEW(orte_buffer_t); - if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer, start))) { - ORTE_ERROR_LOG(rc); - } - - if (ORTE_SUCCESS == rc) { - orte_gpr_base_print_dump(buffer); - } - OBJ_RELEASE(buffer); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - -int orte_gpr_replica_dump_a_trigger( - char *name, - orte_gpr_trigger_id_t id) -{ - orte_buffer_t buffer; - orte_gpr_replica_trigger_t **trigs; - orte_std_cntr_t i, j; - int rc; - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - - if (NULL == name) { /* dump the trigger corresponding to the provided id */ - trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trigs[i]) { - j++; - if (id == trigs[i]->index) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(&buffer, trigs[i]))) { - ORTE_ERROR_LOG(rc); - } - goto PROCESS; - } - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_DESTRUCT(&buffer); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return ORTE_ERR_NOT_FOUND; - } else { /* dump the named trigger */ - trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trigs[i]) { - j++; - if (0 == strcmp(name, trigs[i]->name)) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(&buffer, trigs[i]))) { - ORTE_ERROR_LOG(rc); - } - goto PROCESS; - } - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_DESTRUCT(&buffer); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return ORTE_ERR_NOT_FOUND; - } - -PROCESS: - if (ORTE_SUCCESS == rc) { - orte_gpr_base_print_dump(&buffer); - } - OBJ_DESTRUCT(&buffer); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - - -int orte_gpr_replica_dump_a_subscription(char *name, - orte_gpr_subscription_id_t id) -{ - orte_buffer_t buffer; - orte_gpr_replica_subscription_t **subs; - orte_std_cntr_t i, j; - int rc; - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - - if (NULL == name) { /* dump the subscription corresponding to the provided id */ - subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_subs && - i < (orte_gpr_replica.subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - if (id == subs[i]->index) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(&buffer, subs[i]))) { - ORTE_ERROR_LOG(rc); - } - goto PROCESS; - } - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_DESTRUCT(&buffer); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return ORTE_ERR_NOT_FOUND; - } else { /* dump the named subscription */ - subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_subs && - i < (orte_gpr_replica.subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - if (0 == strcmp(name, subs[i]->name)) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(&buffer, subs[i]))) { - ORTE_ERROR_LOG(rc); - } - goto PROCESS; - } - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_DESTRUCT(&buffer); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return ORTE_ERR_NOT_FOUND; - } - -PROCESS: - if (ORTE_SUCCESS == rc) { - orte_gpr_base_print_dump(&buffer); - } - OBJ_DESTRUCT(&buffer); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - - -int orte_gpr_replica_dump_callbacks(void) -{ - orte_buffer_t *buffer; - int rc; - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr_replica_dump_callbacks: entered", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - buffer = OBJ_NEW(orte_buffer_t); - if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_callbacks_fn(buffer))) { - ORTE_ERROR_LOG(rc); - } - - if (ORTE_SUCCESS == rc) { - orte_gpr_base_print_dump(buffer); - } - OBJ_RELEASE(buffer); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - -int orte_gpr_replica_dump_notify_msg(orte_gpr_notify_message_t *msg) -{ - orte_buffer_t *answer; - int rc; - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_dump_notify_msg(answer, msg))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - - -int orte_gpr_replica_dump_notify_data(orte_gpr_notify_data_t *data) -{ - orte_buffer_t *answer; - int rc; - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_dump_notify_data(answer, data))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - -int orte_gpr_replica_dump_value(orte_gpr_value_t *value) -{ - orte_buffer_t *answer; - int rc; - - answer = OBJ_NEW(orte_buffer_t); - if (NULL == answer) { /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_dump_value(answer, value))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer))) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(answer); - return rc; -} - -int orte_gpr_replica_dump_segment_size(char *segment) -{ - orte_buffer_t *buffer; - int rc; - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - buffer = OBJ_NEW(orte_buffer_t); - if (NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_segment_size_fn(buffer, segment))) { - ORTE_ERROR_LOG(rc); - } - - if (ORTE_SUCCESS == rc) { - orte_gpr_base_print_dump(buffer); - } - OBJ_RELEASE(buffer); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_dump_local_trigs_subs_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_dump_local_trigs_subs_api.c deleted file mode 100644 index 3b941787da..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_dump_local_trigs_subs_api.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/output.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/api_layer/gpr_replica_api.h" - -int orte_gpr_replica_dump_local_triggers(void) -{ - orte_gpr_replica_local_trigger_t **trigs; - orte_std_cntr_t j, k; - - opal_output(orte_gpr_base_output, "DUMP OF LOCAL TRIGGERS for %s\n", - ORTE_NAME_PRINT(orte_process_info.my_name)); - opal_output(orte_gpr_base_output, "Number of triggers: %lu\n", (unsigned long) orte_gpr_replica_globals.num_local_trigs); - - trigs = (orte_gpr_replica_local_trigger_t**)(orte_gpr_replica_globals.local_triggers)->addr; - for (j=0, k=0; k < orte_gpr_replica_globals.num_local_trigs && - j < (orte_gpr_replica_globals.local_triggers)->size; j++) { - if (NULL != trigs[j]) { - k++; - opal_output(orte_gpr_base_output, "Data for trigger %lu", (unsigned long) trigs[j]->id); - if (NULL == trigs[j]->name) { - opal_output(orte_gpr_base_output, "\tNOT a named trigger"); - } else { - opal_output(orte_gpr_base_output, "\ttrigger name: %s", trigs[j]->name); - } - if (NULL == trigs[j]->callback) { - opal_output(orte_gpr_base_output, "\tNULL callback"); - } else { - opal_output(orte_gpr_base_output, "\tCallback %llx", (unsigned long long)(intptr_t)trigs[j]->callback); - } - } - } - return ORTE_SUCCESS; -} - -int orte_gpr_replica_dump_local_subscriptions(void) -{ - orte_gpr_replica_local_subscriber_t **subs; - orte_std_cntr_t j, k; - - opal_output(orte_gpr_base_output, "DUMP OF LOCAL SUBSCRIPTIONS for %s\n", - ORTE_NAME_PRINT(orte_process_info.my_name)); - opal_output(orte_gpr_base_output, "Number of subscriptions: %lu\n", (unsigned long) orte_gpr_replica_globals.num_local_subs); - - subs = (orte_gpr_replica_local_subscriber_t**)(orte_gpr_replica_globals.local_subscriptions)->addr; - for (j=0, k=0; k < orte_gpr_replica_globals.num_local_subs && - j < (orte_gpr_replica_globals.local_subscriptions)->size; j++) { - if (NULL != subs[j]) { - k++; - opal_output(orte_gpr_base_output, "Data for subscription %lu", (unsigned long) subs[j]->id); - if (NULL == subs[j]->name) { - opal_output(orte_gpr_base_output, "\tNOT a named subscription"); - } else { - opal_output(orte_gpr_base_output, "\tsubscription name: %s", subs[j]->name); - } - if (NULL == subs[j]->callback) { - opal_output(orte_gpr_base_output, "\tNULL callback"); - } else { - opal_output(orte_gpr_base_output, "\tCallback %llx", (unsigned long long)(intptr_t)subs[j]->callback); - } - } - } - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_put_get_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_put_get_api.c deleted file mode 100644 index 863b50b124..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_put_get_api.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "gpr_replica_api.h" - -int orte_gpr_replica_put(orte_std_cntr_t cnt, orte_gpr_value_t **values) -{ - int rc = ORTE_SUCCESS; - orte_std_cntr_t i, j; - orte_gpr_value_t *val; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *itags=NULL; - - OPAL_TRACE(1); - - /* protect ourselves against errors */ - if (NULL == values) { - return ORTE_ERROR; - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - for (i=0; i < cnt; i++) { - val = values[i]; - - /* first check for error - all keyvals must have a non-NULL string key */ - for (j=0; j < val->cnt; j++) { - if (NULL == (val->keyvals[j])->key) { - rc = ORTE_ERR_BAD_PARAM; - goto CLEANUP; - } - } - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, val->segment))) { - goto CLEANUP; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&itags, seg, - val->tokens, &(val->num_tokens)))) { - goto CLEANUP; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_put_fn(val->addr_mode, seg, itags, val->num_tokens, - val->cnt, val->keyvals))) { - goto CLEANUP; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_check_events())) { - goto CLEANUP; - } - - if (NULL != itags) { - free(itags); - itags = NULL; - } - } - - CLEANUP: /* or not ... */ - /* release list of itags */ - if (NULL != itags) { - free(itags); - } - - if (ORTE_SUCCESS == rc) { - rc = orte_gpr_replica_process_callbacks(); - } else { - ORTE_ERROR_LOG(rc); - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - - -int orte_gpr_replica_put_nb(orte_std_cntr_t cnt, orte_gpr_value_t **values, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - - -int orte_gpr_replica_get(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values) -{ - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *tokentags=NULL, *keytags=NULL; - orte_std_cntr_t num_tokens=0, num_keys=0; - int rc; - - OPAL_TRACE(1); - - *cnt = 0; - *values = NULL; - - /* protect against errors */ - if (NULL == segment) { - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, segment))) { - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&tokentags, seg, - tokens, &num_tokens))) { - goto CLEANUP; - } - - /* convert keys to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&keytags, seg, - keys, &num_keys))) { - goto CLEANUP; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_fn(addr_mode, seg, - tokentags, num_tokens, - keytags, num_keys, - cnt, values))) { - goto CLEANUP; - } - -CLEANUP: - if (NULL != tokentags) { - free(tokentags); - } - - if (NULL != keytags) { - free(keytags); - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - -} - - -int orte_gpr_replica_get_conditional(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_std_cntr_t num_conditions, orte_gpr_keyval_t **conditions, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values) -{ - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *tokentags=NULL, *keytags=NULL; - orte_gpr_replica_itagval_t **conds=NULL; - orte_std_cntr_t num_tokens=0, num_keys=0, i; - int rc; - - OPAL_TRACE(1); - - *cnt = 0; - *values = NULL; - - /* protect against errors */ - if (NULL == segment) { - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, segment))) { - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&tokentags, seg, - tokens, &num_tokens))) { - goto CLEANUP; - } - - /* convert keys to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&keytags, seg, - keys, &num_keys))) { - goto CLEANUP; - } - - /* convert conditions to itagvals */ - conds = (orte_gpr_replica_itagval_t**)malloc(num_conditions*sizeof(orte_gpr_replica_itagval_t*)); - memset(conds, 0, num_conditions*sizeof(orte_gpr_replica_itagval_t*)); /* init the space */ - - if (NULL == conds) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - for (i=0; i < num_conditions; i++) { - conds[i] = OBJ_NEW(orte_gpr_replica_itagval_t); - if (NULL == conds[i]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_itag(&(conds[i]->itag), seg, conditions[i]->key))) { - goto CLEANUP; - } - conds[i]->value = OBJ_NEW(orte_data_value_t); - if (NULL == conds[i]->value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - conds[i]->value->type = conditions[i]->value->type; - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((conds[i]->value)->data), conditions[i]->value->data, conds[i]->value->type))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_conditional_fn(addr_mode, seg, - tokentags, num_tokens, keytags, num_keys, - num_conditions, conds, - cnt, values))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - -CLEANUP: - if (NULL != tokentags) { - free(tokentags); - } - - if (NULL != keytags) { - free(keytags); - } - - for (i=0; i < num_conditions; i++) { - if (NULL != conds[i]) OBJ_RELEASE(conds[i]); - } - if (NULL != conds) free(conds); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - -} - - -int orte_gpr_replica_get_nb(orte_gpr_addr_mode_t addr_mode, - char *segment, char **tokens, char **keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(1); - - return ORTE_ERR_NOT_IMPLEMENTED; -} diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_segment_ops_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_segment_ops_api.c deleted file mode 100644 index c9e0c27b18..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_segment_ops_api.c +++ /dev/null @@ -1,119 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "opal/util/trace.h" - -#include "orte/class/orte_pointer_array.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "gpr_replica_api.h" - -int orte_gpr_replica_preallocate_segment(char *name, orte_std_cntr_t num_slots) -{ - int rc; - orte_gpr_replica_segment_t *seg=NULL; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, name))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - rc = orte_pointer_array_set_size(seg->containers, num_slots); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - -int orte_gpr_replica_get_number_entries(orte_std_cntr_t *n, char *segment, char **tokens) -{ - int rc; - orte_gpr_replica_segment_t *seg=NULL; - orte_std_cntr_t num_tokens=0; - orte_gpr_replica_itag_t *itags=NULL; - orte_gpr_replica_container_t **cptr; - orte_std_cntr_t j, k, num_keyvals; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, segment))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - /* if no tokens provided, just return the number of containers */ - if (NULL == tokens) { - *n = seg->num_containers; - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return ORTE_SUCCESS; - } - - /* otherwise, look up the container - convert tokens to array of itags */ - num_keyvals = 0; - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&itags, seg, - tokens, &num_tokens))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - - /* find the specified container(s) */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_containers(seg, ORTE_GPR_REPLICA_AND, - itags, num_tokens))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* loop through any found containers and add up their keyvals */ - cptr = (orte_gpr_replica_container_t**)(orte_gpr_replica_globals.srch_cptr)->addr; - for (j=0, k=0; k < orte_gpr_replica_globals.num_srch_cptr && - j < (orte_gpr_replica_globals.srch_cptr)->size; j++) { - if (NULL != cptr[j]) { - k++; - num_keyvals += cptr[j]->num_itagvals; - } - } - -CLEANUP: - if (NULL != itags) free(itags); - - *n = num_keyvals; - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_subscribe_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_subscribe_api.c deleted file mode 100644 index fe219d20e8..0000000000 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_subscribe_api.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/dss/dss_types.h" - -#include "orte/mca/ns/ns.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "gpr_replica_api.h" - -int -orte_gpr_replica_subscribe(orte_std_cntr_t num_subs, - orte_gpr_subscription_t **subscriptions, - orte_std_cntr_t num_trigs, - orte_gpr_trigger_t **trigs) -{ - int rc; - - OPAL_TRACE(1); - - /* protect against errors */ - if (NULL == subscriptions && NULL == trigs) { /* need at least one */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - /* store callback function and user_tag in local list for lookup - * generate id_tag to put in registry to identify lookup entry - * for each subscription - the subscription id is returned - * inside the subscription objects - */ - if (NULL != subscriptions) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_enter_local_subscription( - num_subs, subscriptions))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - } - - /* if any triggers were provided, get id tags for them - the - * idtags are returned inside the trigger objects - */ - if (NULL != trigs) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_enter_local_trigger( - num_trigs, trigs))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - } - - /* register subscriptions */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_subscribe_fn(NULL, - num_subs, subscriptions, - num_trigs, trigs))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - rc = orte_gpr_replica_process_callbacks(); - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - - -int orte_gpr_replica_unsubscribe(orte_gpr_subscription_id_t sub_number) -{ - orte_gpr_replica_local_subscriber_t **subs; - orte_std_cntr_t i, j; - int rc; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_subscription(NULL, sub_number))) { - ORTE_ERROR_LOG(rc); - } - - if (ORTE_SUCCESS == rc) { - /* find and remove it from the local subscription tracking system */ - subs = (orte_gpr_replica_local_subscriber_t**)(orte_gpr_replica_globals.local_subscriptions)->addr; - for (i=0, j=0; j < orte_gpr_replica_globals.num_local_subs && - i < (orte_gpr_replica_globals.local_subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - if (sub_number == subs[i]->id) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_local_subscription(subs[i]))) { - ORTE_ERROR_LOG(rc); - } - } - } - } - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} - - -int orte_gpr_replica_cancel_trigger(orte_gpr_trigger_id_t trig) -{ - orte_gpr_replica_local_trigger_t **trigs; - orte_std_cntr_t i, j; - int rc; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - rc = orte_gpr_replica_remove_trigger(NULL, trig); - - if (ORTE_SUCCESS == rc) { - /* find and remove it from the local trigger tracking system */ - trigs = (orte_gpr_replica_local_trigger_t**)(orte_gpr_replica_globals.local_triggers)->addr; - for (i=0, j=0; j < orte_gpr_replica_globals.num_local_trigs && - i < (orte_gpr_replica_globals.local_triggers)->size; i++) { - if (NULL != trigs[i]) { - j++; - if (trig == trigs[i]->id) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_local_trigger(trigs[i]))) { - ORTE_ERROR_LOG(rc); - } - } - } - } - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - return rc; -} diff --git a/orte/mca/gpr/replica/communications/Makefile.am b/orte/mca/gpr/replica/communications/Makefile.am deleted file mode 100644 index 3e01805887..0000000000 --- a/orte/mca/gpr/replica/communications/Makefile.am +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -noinst_LTLIBRARIES = libmca_gpr_replica_comm.la -libmca_gpr_replica_comm_la_SOURCES = \ - gpr_replica_comm.h \ - gpr_replica_recv_proxy_msgs.c \ - gpr_replica_remote_msg.c \ - gpr_replica_cmd_processor.c \ - gpr_replica_compound_cmd_cm.c \ - gpr_replica_cleanup_cm.c \ - gpr_replica_del_index_cm.c \ - gpr_replica_dump_cm.c \ - gpr_replica_arithmetic_ops_cm.c \ - gpr_replica_put_get_cm.c \ - gpr_replica_subscribe_cm.c diff --git a/orte/mca/gpr/replica/communications/gpr_replica_arithmetic_ops_cm.c b/orte/mca/gpr/replica/communications/gpr_replica_arithmetic_ops_cm.c deleted file mode 100644 index 09bd26b026..0000000000 --- a/orte/mca/gpr/replica/communications/gpr_replica_arithmetic_ops_cm.c +++ /dev/null @@ -1,310 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -int orte_gpr_replica_recv_arith_op_cmd(orte_buffer_t *input_buffer, orte_buffer_t *answer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_ARITH_CMD; - orte_std_cntr_t n, num_tokens, num_keys; - orte_gpr_addr_mode_t addr_mode; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *tokentags=NULL, *keytags=NULL; - orte_dss_arith_op_t op_flag; - orte_data_value_t *operand; - int rc, ret; - char *segment=NULL, **tokens=NULL, **keys=NULL; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &addr_mode, &n, ORTE_GPR_ADDR_MODE))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &segment, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &num_tokens, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (0 < num_tokens) { /* tokens provided - get them */ - tokens = (char**)malloc(num_tokens*sizeof(char*)); - if (NULL == tokens) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - ret = ORTE_ERR_OUT_OF_RESOURCE; - goto RETURN_ERROR; - } - n = num_tokens; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, tokens, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - free(tokens); - goto RETURN_ERROR; - } - } else { /* no tokens provided */ - tokens = NULL; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &num_keys, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (0 < num_keys) { /* keys provided - get them */ - keys = (char**)malloc(num_keys*sizeof(char*)); - if (NULL == keys) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - ret = ORTE_ERR_OUT_OF_RESOURCE; - goto RETURN_ERROR; - } - n = num_keys; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, keys, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - } else { /* no keys provided */ - keys = NULL; - } - - /* find the segment */ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_find_seg(&seg, true, segment))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_get_itag_list(&tokentags, seg, - tokens, &num_tokens))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - /* convert keys to array of itags */ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_get_itag_list(&keytags, seg, - keys, &num_keys))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - /** unpack the operation flag */ - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &op_flag, &n, ORTE_ARITH_OP))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - /** unpack the operand */ - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &operand, &n, ORTE_DATA_VALUE))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_arith_op_fn(addr_mode, seg, tokentags, num_tokens, - num_keys, keytags, op_flag, operand))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - -RETURN_ERROR: - /** cleanup memory */ - OBJ_RELEASE(operand); - if (NULL != tokentags) free(tokentags); - if (NULL != keytags) free(keytags); - - /** pack the resulting status to return to caller */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - } - - if (ORTE_SUCCESS == ret) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - } - } - - return ret; -} - -int orte_gpr_replica_recv_increment_value_cmd(orte_buffer_t *cmd, orte_buffer_t *answer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_INCREMENT_VALUE_CMD; - orte_gpr_value_t *value; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *itags=NULL; - orte_std_cntr_t n; - int rc, ret; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, &value, &n, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, value->segment))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&itags, seg, - value->tokens, &(value->num_tokens)))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_increment_value_fn(value->addr_mode, seg, - itags, value->num_tokens, value->cnt, value->keyvals))) { - ORTE_ERROR_LOG(ret); - } - - /* release list of itags */ - if (NULL != itags) { - free(itags); - } - - /* release value object */ - OBJ_RELEASE(value); - - if (ORTE_SUCCESS == ret) { - if (ORTE_SUCCESS != - (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - RETURN_ERROR: - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ret; -} - -int orte_gpr_replica_recv_decrement_value_cmd(orte_buffer_t *cmd, orte_buffer_t *answer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_DECREMENT_VALUE_CMD; - orte_gpr_value_t *value; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *itags=NULL; - orte_std_cntr_t n; - int rc, ret; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(cmd, &value, &n, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, value->segment))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&itags, seg, - value->tokens, &(value->num_tokens)))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_decrement_value_fn(value->addr_mode, seg, - itags, value->num_tokens, value->cnt, value->keyvals))) { - ORTE_ERROR_LOG(ret); - } - - /* release list of itags */ - if (NULL != itags) { - free(itags); - } - - /* release value object */ - OBJ_RELEASE(value); - - if (ORTE_SUCCESS == ret) { - if (ORTE_SUCCESS != - (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - RETURN_ERROR: - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ret; -} diff --git a/orte/mca/gpr/replica/communications/gpr_replica_cleanup_cm.c b/orte/mca/gpr/replica/communications/gpr_replica_cleanup_cm.c deleted file mode 100644 index e6f077d403..0000000000 --- a/orte/mca/gpr/replica/communications/gpr_replica_cleanup_cm.c +++ /dev/null @@ -1,130 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -int orte_gpr_replica_recv_cleanup_job_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_CLEANUP_JOB_CMD; - orte_jobid_t jobid=0; - orte_std_cntr_t n; - int rc, ret; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &jobid, &n, ORTE_JOBID))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - ret = orte_gpr_replica_cleanup_job_fn(jobid); - - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - } - -RETURN_ERROR: - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ret; -} - - -int orte_gpr_replica_recv_cleanup_proc_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_CLEANUP_PROC_CMD; - orte_process_name_t proc; - orte_std_cntr_t n; - int rc, ret; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &proc, &n, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - ret = orte_gpr_replica_cleanup_proc_fn(&proc); - - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - } - -RETURN_ERROR: - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ret; -} - - -int orte_gpr_replica_ft_event(int state) { - - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/communications/gpr_replica_cmd_processor.c b/orte/mca/gpr/replica/communications/gpr_replica_cmd_processor.c deleted file mode 100644 index 7c241799e3..0000000000 --- a/orte/mca/gpr/replica/communications/gpr_replica_cmd_processor.c +++ /dev/null @@ -1,423 +0,0 @@ -/* -*- C -*- -* -* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -* University Research and Technology -* Corporation. All rights reserved. -* Copyright (c) 2004-2005 The University of Tennessee and The University -* of Tennessee Research Foundation. All rights -* reserved. -* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -* University of Stuttgart. All rights reserved. -* Copyright (c) 2004-2005 The Regents of the University of California. -* All rights reserved. -* $COPYRIGHT$ -* -* Additional copyrights may follow -* -* $HEADER$ -*/ -/** @file: -* -* The Open MPI General Purpose Registry - Replica component -* -*/ - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "opal/util/output.h" -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -/* -* handle message from proxies - */ - -int orte_gpr_replica_process_command_buffer(orte_buffer_t *input_buffer, - orte_process_name_t *sender, - orte_buffer_t **output_buffer) -{ - orte_buffer_t *answer; - orte_gpr_cmd_flag_t command; - int rc, ret, rc2; - orte_std_cntr_t n; - bool compound_cmd=false; - - OPAL_TRACE(3); - - *output_buffer = OBJ_NEW(orte_buffer_t); - if (NULL == *output_buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - answer = *output_buffer; /* for convenience */ - - n = 1; - rc = ORTE_SUCCESS; - ret = ORTE_SUCCESS; - - while (ORTE_SUCCESS == (rc = orte_dss.unpack(input_buffer, &command, &n, ORTE_GPR_CMD))) { - switch(command) { - - case ORTE_GPR_COMPOUND_CMD: /***** COMPOUND COMMAND ******/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tcompound cmd"); - } - - compound_cmd = true; - break; - - - case ORTE_GPR_DELETE_SEGMENT_CMD: /****** DELETE SEGMENT *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdelete segment cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_delete_segment_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - case ORTE_GPR_PUT_CMD: /***** PUT *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tput cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_put_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - break; - - - case ORTE_GPR_GET_CMD: /***** GET *****/ - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tget cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_get_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - case ORTE_GPR_GET_CONDITIONAL_CMD: /***** GET_CONDITIONAL *****/ - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tget conditional cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_get_conditional_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - case ORTE_GPR_DELETE_ENTRIES_CMD: /***** DELETE ENTRIES *****/ - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdelete object cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_delete_entries_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - case ORTE_GPR_INDEX_CMD: /***** INDEX *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tindex cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_index_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - case ORTE_GPR_SUBSCRIBE_CMD: /***** SUBSCRIBE *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tsubscribe cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_subscribe_cmd(sender, input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - case ORTE_GPR_UNSUBSCRIBE_CMD: /***** UNSUBSCRIBE *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tunsubscribe cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_unsubscribe_cmd(sender, input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_CANCEL_TRIGGER_CMD: /***** CANCEL_TRIGGER *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tcancel trigger cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_cancel_trigger_cmd(sender, input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_DUMP_ALL_CMD: /***** DUMP *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdump all cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_all_cmd(answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_DUMP_SEGMENTS_CMD: /***** DUMP *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdump segments cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_segments_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_DUMP_TRIGGERS_CMD: /***** DUMP *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdump triggers cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_triggers_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD: /***** DUMP *****/ - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdump subscriptions cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_subscriptions_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_DUMP_A_TRIGGER_CMD: /***** DUMP *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdump a trigger cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_a_trigger_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - case ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD: /***** DUMP *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdump a subscription cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_a_subscription_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - case ORTE_GPR_DUMP_CALLBACKS_CMD: /***** DUMP *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdump callbacks cmd"); - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_callbacks_cmd(answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_DUMP_SEGMENT_SIZE_CMD: /***** DUMP *****/ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_segment_size_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - case ORTE_GPR_ARITH_CMD: /***** ARITH *****/ - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tarith cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_arith_op_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_INCREMENT_VALUE_CMD: /***** INCREMENT_VALUE *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tincrement_value cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_increment_value_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_DECREMENT_VALUE_CMD: /***** DECREMENT_VALUE ******/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tdecrement_value cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_decrement_value_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_CLEANUP_JOB_CMD: /***** CLEANUP JOB *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tcleanup job cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_cleanup_job_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - case ORTE_GPR_CLEANUP_PROC_CMD: /***** CLEANUP PROCESS *****/ - if (orte_gpr_replica_globals.debug) { - opal_output(0, "\tcleanup proc cmd"); - } - - if (ORTE_SUCCESS != (ret = - orte_gpr_replica_recv_cleanup_proc_cmd(input_buffer, answer))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - break; - - - - default: /**** UNRECOGNIZED COMMAND ****/ - command = ORTE_GPR_ERROR; - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, (void*)&command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - } /* end switch command */ - - n = 1; /* unpack a single command */ - } /* end while */ - - if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - ORTE_ERROR_LOG(rc); - } - - /* deal with compound cmds to ensure proper return values */ - if (compound_cmd) { - OBJ_RELEASE(answer); - *output_buffer = OBJ_NEW(orte_buffer_t); - if (NULL == *output_buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_GPR_COMPOUND_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.pack(*output_buffer, (void*)&command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - ret = ORTE_SUCCESS; - if (ORTE_SUCCESS != (rc = orte_dss.pack(*output_buffer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - } - - return ORTE_SUCCESS; - -RETURN_ERROR: - if (orte_gpr_replica_globals.debug) { - opal_output(0, "unrecognized command"); - } - OBJ_RELEASE(*output_buffer); - *output_buffer = answer = OBJ_NEW(orte_buffer_t); - if (NULL == *output_buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc2 = orte_dss.pack(answer, (void*)&command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc2); - } - if (ORTE_SUCCESS != ret) { - orte_dss.pack(answer, &ret, 1, ORTE_INT); - return rc; - } - orte_dss.pack(answer, &rc, 1, ORTE_INT); - return rc; -} - diff --git a/orte/mca/gpr/replica/communications/gpr_replica_comm.h b/orte/mca/gpr/replica/communications/gpr_replica_comm.h deleted file mode 100644 index 0fab38278f..0000000000 --- a/orte/mca/gpr/replica/communications/gpr_replica_comm.h +++ /dev/null @@ -1,156 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -/** @file - */ - -#ifndef ORTE_GPR_REPLICA_COMM_H -#define ORTE_GPR_REPLICA_COMM_H - - -#include "orte_config.h" - -#include - -#include "orte/class/orte_pointer_array.h" - -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/rml/rml_types.h" - -#include "orte/mca/gpr/replica/gpr_replica.h" -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" - -#include "orte/mca/gpr/replica/gpr_replica.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * GPR Replica Communications Interfaces - */ - -/* - * Proxy msg receiver - */ -void orte_gpr_replica_recv(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata); - -/* - * Remote notification transmitter - */ - -/* - * Local notification transmitter - */ - -/* - * Command buffer processor - */ -int orte_gpr_replica_process_command_buffer(orte_buffer_t *input_buffer, - orte_process_name_t *sender, - orte_buffer_t **output_buffer); - -/* - * Messaging functions - */ -int orte_gpr_replica_remote_notify(orte_process_name_t *recipient, - orte_gpr_notify_message_t *msg); - -/* - * define the local functions for processing commands - */ -int orte_gpr_replica_recv_compound_cmd(orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_delete_segment_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_put_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_get_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *answer); - -int orte_gpr_replica_recv_get_conditional_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_delete_entries_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_index_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *answer); - -int orte_gpr_replica_recv_subscribe_cmd(orte_process_name_t* sender, - orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_unsubscribe_cmd(orte_process_name_t* sender, - orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_cancel_trigger_cmd(orte_process_name_t* sender, - orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_dump_all_cmd(orte_buffer_t *answer); - -int orte_gpr_replica_recv_dump_segments_cmd(orte_buffer_t *input_buffer, orte_buffer_t *answer); - -int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *answer); - -int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *answer); - -int orte_gpr_replica_recv_dump_a_trigger_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *answer); - -int orte_gpr_replica_recv_dump_a_subscription_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *answer); - -int orte_gpr_replica_recv_dump_callbacks_cmd(orte_buffer_t *answer); - -int orte_gpr_replica_recv_dump_segment_size_cmd(orte_buffer_t *input_buffer, orte_buffer_t *answer); - -int orte_gpr_replica_recv_get_startup_msg_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *answer); - -int orte_gpr_replica_recv_cleanup_job_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_cleanup_proc_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_arith_op_cmd(orte_buffer_t *cmd, orte_buffer_t *answer); - -int orte_gpr_replica_recv_increment_value_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -int orte_gpr_replica_recv_decrement_value_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/gpr/replica/communications/gpr_replica_compound_cmd_cm.c b/orte/mca/gpr/replica/communications/gpr_replica_compound_cmd_cm.c deleted file mode 100644 index 7fde727028..0000000000 --- a/orte/mca/gpr/replica/communications/gpr_replica_compound_cmd_cm.c +++ /dev/null @@ -1,48 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -int orte_gpr_replica_recv_compound_cmd(orte_buffer_t *output_buffer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_COMPOUND_CMD; - int rc; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - } - return rc; -} diff --git a/orte/mca/gpr/replica/communications/gpr_replica_del_index_cm.c b/orte/mca/gpr/replica/communications/gpr_replica_del_index_cm.c deleted file mode 100644 index b391621789..0000000000 --- a/orte/mca/gpr/replica/communications/gpr_replica_del_index_cm.c +++ /dev/null @@ -1,284 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -int orte_gpr_replica_recv_delete_segment_cmd(orte_buffer_t *buffer, orte_buffer_t *answer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_DELETE_SEGMENT_CMD; - char *segment=NULL; - orte_gpr_replica_segment_t *seg=NULL; - orte_std_cntr_t n; - int rc, ret; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &segment, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_find_seg(&seg, false, segment))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_release_segment(&seg))) { - ORTE_ERROR_LOG(ret); - } - - RETURN_ERROR: - if (NULL != segment) free(segment); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ret; -} - -int orte_gpr_replica_recv_delete_entries_cmd(orte_buffer_t *buffer, orte_buffer_t *answer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_DELETE_ENTRIES_CMD; - orte_gpr_addr_mode_t addr_mode; - orte_gpr_replica_itag_t *token_itags=NULL, *key_itags=NULL; - orte_gpr_replica_segment_t *seg=NULL; - char *segment=NULL, **tokens=NULL, **keys=NULL; - orte_std_cntr_t num_tokens=0, num_keys=0, i, n; - int rc, ret; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &addr_mode, &n, ORTE_GPR_ADDR_MODE))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &segment, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &num_tokens, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (0 >= num_tokens) { /* no tokens provided - wildcard case */ - tokens = NULL; - } else { /* tokens provided */ - tokens = (char**)malloc(num_tokens*sizeof(char*)); - if (NULL == tokens) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - ret = ORTE_ERR_OUT_OF_RESOURCE; - goto RETURN_ERROR; - } - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, tokens, &num_tokens, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &num_keys, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (0 >= num_keys) { /* no keys provided - wildcard case */ - keys = NULL; - } else { /* keys provided */ - keys = (char**)malloc(num_keys*sizeof(char*)); - if (NULL == keys) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - ret = ORTE_ERR_OUT_OF_RESOURCE; - goto RETURN_ERROR; - } - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, keys, &num_keys, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - } - - /* locate the segment */ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_find_seg(&seg, false, segment))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_get_itag_list(&token_itags, seg, tokens, &num_tokens))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_get_itag_list(&key_itags, seg, keys, &num_keys))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - ret = orte_gpr_replica_delete_entries_fn(addr_mode, seg, - token_itags, num_tokens, - key_itags, num_keys); - - if (ORTE_SUCCESS == ret) { - orte_gpr_replica_check_events(); - } - - - RETURN_ERROR: - if (NULL != segment) { - free(segment); - } - - if (NULL != tokens) { - for (i=0; iaddr; - for (i=0, j=0; j < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trigs[i]) { - j++; - if (id == trigs[i]->index) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(answer, trigs[i]))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - - } else { /* dump the named trigger */ - trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trigs[i]) { - j++; - if (0 == strcmp(name, trigs[i]->name)) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(answer, trigs[i]))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - } - - return rc; -} - -int orte_gpr_replica_recv_dump_a_subscription_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *answer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD; - orte_gpr_replica_subscription_t **subs; - orte_gpr_subscription_id_t id; - orte_std_cntr_t n, i, j; - char *name; - int rc; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &name, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &id, &n, ORTE_GPR_SUBSCRIPTION_ID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (NULL == name) { /* dump the subscription corresponding to the provided id */ - subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_subs && - i < (orte_gpr_replica.subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - if (id == subs[i]->index) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(answer, subs[i]))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - - } else { /* dump the named subscription */ - subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_subs && - i < (orte_gpr_replica.subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - if (0 == strcmp(name, subs[i]->name)) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(answer, subs[i]))) { - ORTE_ERROR_LOG(rc); - } - free(name); - return rc; - } - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - } - - return rc; -} - -int orte_gpr_replica_recv_dump_callbacks_cmd(orte_buffer_t *answer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_CALLBACKS_CMD; - int rc; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_gpr_replica_dump_callbacks_fn(answer); - - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - } - return rc; -} - -int orte_gpr_replica_recv_dump_segment_size_cmd(orte_buffer_t *input_buffer, orte_buffer_t *answer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_SEGMENT_SIZE_CMD; - char *segment; - orte_std_cntr_t n; - int rc; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &segment, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - rc = orte_gpr_replica_dump_segment_size_fn(answer, segment); - - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - } - return rc; -} diff --git a/orte/mca/gpr/replica/communications/gpr_replica_put_get_cm.c b/orte/mca/gpr/replica/communications/gpr_replica_put_get_cm.c deleted file mode 100644 index c6025087ba..0000000000 --- a/orte/mca/gpr/replica/communications/gpr_replica_put_get_cm.c +++ /dev/null @@ -1,534 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -int orte_gpr_replica_recv_put_cmd(orte_buffer_t *buffer, orte_buffer_t *answer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_PUT_CMD; - orte_gpr_value_t **values = NULL, *val; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *itags=NULL; - int rc, ret; - orte_std_cntr_t i=0, cnt, num_values; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - cnt = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &num_values, &cnt, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - values = (orte_gpr_value_t**)malloc(num_values * sizeof(orte_gpr_value_t*)); - if (NULL == values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - ret = ORTE_ERR_OUT_OF_RESOURCE; - goto RETURN_ERROR; - } - - cnt = num_values; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, values, &cnt, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - free(values); - ret = rc; - goto RETURN_ERROR; - } - - for (i=0; i < cnt; i++) { - val = values[i]; - - /* find the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, val->segment))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&itags, seg, - val->tokens, &(val->num_tokens)))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (ret = orte_gpr_replica_put_fn(val->addr_mode, seg, itags, - val->num_tokens, val->cnt, val->keyvals))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS == ret) { - if (ORTE_SUCCESS != - (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - if (NULL != itags) free(itags); - itags = NULL; - } - - RETURN_ERROR: - /* release list of itags */ - if (NULL != itags) { - free(itags); - } - - /* release values */ - if (NULL != values) { - for (i=0; i < cnt; i++) { - if (NULL != values[i]) { - OBJ_RELEASE(values[i]); - } - } - if (NULL != values) free(values); - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ret; -} - -int orte_gpr_replica_recv_get_cmd(orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_GET_CMD; - orte_gpr_addr_mode_t addr_mode; - orte_gpr_replica_segment_t *seg=NULL; - orte_gpr_replica_itag_t *tokentags=NULL, *keytags=NULL; - int rc, ret; - char *segment=NULL, **tokens=NULL, **keys=NULL; - orte_std_cntr_t i=0, cnt=0; - orte_std_cntr_t num_tokens=0, num_keys=0, n; - orte_gpr_value_t **values=NULL; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &addr_mode, &n, ORTE_GPR_ADDR_MODE))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &segment, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &num_tokens, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (0 < num_tokens) { /* tokens provided - get them */ - tokens = (char**)malloc(num_tokens*sizeof(char*)); - if (NULL == tokens) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - ret = ORTE_ERR_OUT_OF_RESOURCE; - goto RETURN_ERROR; - } - n = num_tokens; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, tokens, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - free(tokens); - goto RETURN_ERROR; - } - } else { /* no tokens provided */ - tokens = NULL; - } - - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, &num_keys, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (0 < num_keys) { /* keys provided - get them */ - keys = (char**)malloc(num_keys*sizeof(char*)); - if (NULL == keys) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - ret = ORTE_ERR_OUT_OF_RESOURCE; - goto RETURN_ERROR; - } - n = num_keys; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(input_buffer, keys, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - } else { /* no keys provided */ - keys = NULL; - } - - /* find the segment */ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_find_seg(&seg, true, segment))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - /* convert tokens to array of itags */ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_get_itag_list(&tokentags, seg, - tokens, &num_tokens))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - /* convert keys to array of itags */ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_get_itag_list(&keytags, seg, - keys, &num_keys))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - /* get the answer */ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_get_fn(addr_mode, seg, - tokentags, num_tokens, - keytags, num_keys, - &cnt, &values))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - RETURN_ERROR: - - /* pack the number of values */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &cnt, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - ret = rc; - } - - /* pack the answer into the output output_buffer */ - if (0 < cnt) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, values, cnt, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - ret = rc; - } - } - - if (NULL != segment) { - free(segment); - } - - if (NULL != tokens) { - for (i=0; iitag), seg, conditions[i]->key))) { - goto RETURN_ERROR; - } - conds[i]->value = OBJ_NEW(orte_data_value_t); - if (NULL == conds[i]->value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - goto RETURN_ERROR; - } - conds[i]->value->type = conditions[i]->value->type; - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((conds[i]->value)->data), conditions[i]->value->data, conds[i]->value->type))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - } - - /* get the answer */ - if (ORTE_SUCCESS != (ret = orte_gpr_replica_get_conditional_fn(addr_mode, seg, - tokentags, num_tokens, keytags, num_keys, - num_conditions, conds, - &cnt, &values))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - RETURN_ERROR: - - /* pack the number of values */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &cnt, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - ret = rc; - } - - /* pack the answer into the output output_buffer */ - if (0 < cnt) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, values, cnt, ORTE_GPR_VALUE))) { - ORTE_ERROR_LOG(rc); - ret = rc; - } - } - - if (NULL != segment) { - free(segment); - } - - if (NULL != tokens) { - for (i=0; i status) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - } - OBJ_RELEASE(buffer); -} - -/* - * handle message from proxies - */ - -void orte_gpr_replica_recv(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata) -{ - orte_buffer_t *answer; - int rc; - - OPAL_TRACE(3); - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr replica: received message from %s", - ORTE_NAME_PRINT(orte_process_info.my_name), ORTE_NAME_PRINT(sender)); - } - - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - if (ORTE_SUCCESS == orte_gpr_replica_process_command_buffer(buffer, sender, &answer)) { - if (0 > orte_rml.send_buffer_nb(sender, answer, tag, 0, orte_gpr_replica_send_cb, NULL)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - } - } - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "gpr replica: msg processing complete - processing callbacks"); - } - - /* be sure to process callbacks */ - if (!orte_gpr_replica.processing_callbacks) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_process_callbacks())) { - ORTE_ERROR_LOG(rc); - } - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return; -} diff --git a/orte/mca/gpr/replica/communications/gpr_replica_remote_msg.c b/orte/mca/gpr/replica/communications/gpr_replica_remote_msg.c deleted file mode 100644 index 6931e25908..0000000000 --- a/orte/mca/gpr/replica/communications/gpr_replica_remote_msg.c +++ /dev/null @@ -1,108 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_types.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -static void orte_gpr_replica_remote_send_cb( - int status, - orte_process_name_t* peer, - orte_buffer_t* req, - orte_rml_tag_t tag, - void* cbdata) -{ - OBJ_RELEASE(req); - return; -} - -int orte_gpr_replica_remote_notify(orte_process_name_t *recipient, - orte_gpr_notify_message_t *message) -{ - orte_buffer_t * buffer; - orte_gpr_cmd_flag_t command; - int rc; - - OPAL_TRACE(3); - - command = ORTE_GPR_NOTIFY_CMD; - - buffer = OBJ_NEW(orte_buffer_t); - if(NULL == buffer) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &message, 1, ORTE_GPR_NOTIFY_MSG))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - - if (0 > orte_rml.send_buffer_nb(recipient, buffer, ORTE_RML_TAG_GPR_NOTIFY, 0, - orte_gpr_replica_remote_send_cb, NULL)) { -#if 0 - /* temporarily disable this error report - * With the new orted-failed-to-start code, we hold a caller in - * the rmgr.spawn function until either the app launches or - * it fails. Failure is indicated by a subscription to NUM_TERMINATED. - * However, that means that a notify_msg is going to get sent to a - * remote process during comm_spawn once all procs terminate. Since - * that process will have terminated, and the HNP processes the trigger - * first, the notify_msg send will fail as the recipient will have - * terminated and exited. - * - * A proper fix will require that we do something different - * in rmgr_proxy.spawn so we don't get a callback after the - * process is done - */ - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - opal_output(0, "send failed to %s", ORTE_NAME_PRINT(recipient)); - orte_dss.dump(0, message, ORTE_GPR_NOTIFY_MSG); - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - return ORTE_ERR_COMM_FAILURE; -#endif - } - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/communications/gpr_replica_subscribe_cm.c b/orte/mca/gpr/replica/communications/gpr_replica_subscribe_cm.c deleted file mode 100644 index 590bc7a488..0000000000 --- a/orte/mca/gpr/replica/communications/gpr_replica_subscribe_cm.c +++ /dev/null @@ -1,209 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/util/proc_info.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -int orte_gpr_replica_recv_subscribe_cmd(orte_process_name_t* sender, - orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_SUBSCRIBE_CMD; - int rc, ret; - orte_std_cntr_t n, num_subs, num_trigs; - orte_gpr_trigger_t **trigs=NULL; - orte_gpr_subscription_t **subscriptions=NULL; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* get the number of subscriptions */ - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &num_subs, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - /* create the space for the subscriptions, if any are there - and unpack them */ - if (0 < num_subs) { - subscriptions = (orte_gpr_subscription_t**)malloc(num_subs * sizeof(orte_gpr_subscription_t*)); - if (NULL == subscriptions) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto RETURN_ERROR; - } - n = num_subs; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, subscriptions, &n, ORTE_GPR_SUBSCRIPTION))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - num_subs = n; - } - - /* get the number of triggers */ - n=1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &num_trigs, &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - /* create the space for the triggers, if any are there - and unpack them */ - if (0 < num_trigs) { - trigs = (orte_gpr_trigger_t**)malloc(num_trigs * sizeof(orte_gpr_trigger_t*)); - if (NULL == trigs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto RETURN_ERROR; - } - n = num_trigs; - if (ORTE_SUCCESS != orte_dss.unpack(input_buffer, trigs, &n, ORTE_GPR_TRIGGER)) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - num_trigs = n; - } - - /* register subscriptions */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_subscribe_fn(sender, - num_subs, subscriptions, - num_trigs, trigs))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_check_events())) { - ORTE_ERROR_LOG(rc); - } - - RETURN_ERROR: - /* release the subscription objects, if any */ - if (NULL != subscriptions) { - for (n=0; n < num_subs; n++) OBJ_RELEASE(subscriptions[n]); - if (NULL != subscriptions) free(subscriptions); - } - /* release the trigger objects, if any */ - if (NULL != trigs) { - for (n=0; n < num_trigs; n++) OBJ_RELEASE(trigs[n]); - if (NULL != trigs) free(trigs); - } - - if (ORTE_SUCCESS != (ret = orte_dss.pack(output_buffer, &rc, 1, ORTE_INT))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - return rc; -} - -int orte_gpr_replica_recv_unsubscribe_cmd(orte_process_name_t *sender, - orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_UNSUBSCRIBE_CMD; - orte_gpr_subscription_id_t sub_number=0; - int rc, ret; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &sub_number, &n, - ORTE_GPR_SUBSCRIPTION_ID))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - ret = orte_gpr_replica_remove_subscription(sender, sub_number); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - } - -RETURN_ERROR: - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ret; -} - - -int orte_gpr_replica_recv_cancel_trigger_cmd(orte_process_name_t *sender, - orte_buffer_t *input_buffer, - orte_buffer_t *output_buffer) -{ - orte_gpr_cmd_flag_t command=ORTE_GPR_CANCEL_TRIGGER_CMD; - orte_gpr_trigger_id_t trig_number=0; - int rc, ret; - orte_std_cntr_t n; - - OPAL_TRACE(3); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &command, 1, ORTE_GPR_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &trig_number, &n, - ORTE_GPR_TRIGGER_ID))) { - ORTE_ERROR_LOG(rc); - ret = rc; - goto RETURN_ERROR; - } - - ret = orte_gpr_replica_remove_trigger(sender, trig_number); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - } - -RETURN_ERROR: - if (ORTE_SUCCESS != (rc = orte_dss.pack(output_buffer, &ret, 1, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ret; -} - diff --git a/orte/mca/gpr/replica/configure.m4 b/orte/mca/gpr/replica/configure.m4 deleted file mode 100644 index b55c423b97..0000000000 --- a/orte/mca/gpr/replica/configure.m4 +++ /dev/null @@ -1,15 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2007 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_gpr_proxy_CONFIG(action-if-can-compile, -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_gpr_replica_CONFIG],[$1]) diff --git a/orte/mca/gpr/replica/configure.params b/orte/mca/gpr/replica/configure.params deleted file mode 100644 index 75702e5709..0000000000 --- a/orte/mca/gpr/replica/configure.params +++ /dev/null @@ -1,25 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_CONFIG_FILES="Makefile api_layer/Makefile transition_layer/Makefile functional_layer/Makefile communications/Makefile" -PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/gpr/replica/functional_layer/Makefile.am b/orte/mca/gpr/replica/functional_layer/Makefile.am deleted file mode 100644 index 5fee6ad971..0000000000 --- a/orte/mca/gpr/replica/functional_layer/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -noinst_LTLIBRARIES = libmca_gpr_replica_fn.la -libmca_gpr_replica_fn_la_SOURCES = \ - gpr_replica_fn.h \ - gpr_replica_arithmetic_ops_fn.c \ - gpr_replica_cleanup_fn.c \ - gpr_replica_del_index_fn.c \ - gpr_replica_dict_fn.c \ - gpr_replica_dump_fn.c \ - gpr_replica_local_trig_ops_fn.c \ - gpr_replica_messaging_fn.c \ - gpr_replica_put_get_fn.c \ - gpr_replica_segment_fn.c \ - gpr_replica_subscribe_fn.c \ - gpr_replica_trig_ops_fn.c diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_arithmetic_ops_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_arithmetic_ops_fn.c deleted file mode 100644 index 6104c5df42..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_arithmetic_ops_fn.c +++ /dev/null @@ -1,330 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/mca/ns/ns.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" - -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - -static int add_zero_itagval(orte_gpr_replica_itagval_t **iptr, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itag_t keytag, - orte_data_type_t type); - - -int orte_gpr_replica_arith_op_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *tokentags, - orte_std_cntr_t num_tokens, - orte_std_cntr_t cnt, - orte_gpr_replica_itag_t *keytags, - orte_dss_arith_op_t op_flag, - orte_data_value_t *operand) -{ - orte_gpr_replica_container_t **cptr, *cptr2; - orte_gpr_replica_addr_mode_t tok_mode; - orte_gpr_replica_itagval_t **ival, *ivptr; - int rc; - orte_std_cntr_t j, k, m, n; - - OPAL_TRACE(2); - - /** extract the token address mode */ - tok_mode = 0x004f & addr_mode; - if (0x00 == tok_mode) { /** default tokens addressing mode to AND */ - tok_mode = ORTE_GPR_REPLICA_AND; - } - - /** find the specified container(s) */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_containers(seg, tok_mode, - tokentags, num_tokens))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 == orte_gpr_replica_globals.num_srch_cptr) { - /* no container found - let's create the container */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_container(&cptr2, seg, - num_tokens, tokentags))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* for each provided value... */ - for (n=0; n < cnt; n++) { - /* store "zero" value for that key in the container - since we already - * have converted keys into itags, all we need to do is store the data - * in the container - */ - if (ORTE_SUCCESS != (rc = add_zero_itagval(&ivptr, seg, cptr2, keytags[n], operand->type))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* now perform the desired operation on *that* value */ - if (ORTE_SUCCESS != (rc = orte_dss.arith(ivptr->value, operand, op_flag))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - /* all done */ - return ORTE_SUCCESS; - } - - /* otherwise, go through list of containers. For each one, - * find the entry and then perform operation - */ - cptr = (orte_gpr_replica_container_t**)(orte_gpr_replica_globals.srch_cptr)->addr; - for (j=0, m=0; m < orte_gpr_replica_globals.num_srch_cptr && - j < (orte_gpr_replica_globals.srch_cptr)->size; j++) { /* for each container */ - if (NULL != cptr[j]) { - m++; - if (ORTE_SUCCESS == orte_gpr_replica_search_container(ORTE_GPR_REPLICA_OR, keytags, cnt, cptr[j])) { - if (0 < orte_gpr_replica_globals.num_srch_ival) { - /* if one or more of the keyvals were found */ - ival = (orte_gpr_replica_itagval_t**)((orte_gpr_replica_globals.srch_ival)->addr); - for (k=0, n=0; n < orte_gpr_replica_globals.num_srch_ival && - k < (orte_gpr_replica_globals.srch_ival)->size; k++) { /* for each found keyval */ - if (NULL != ival[k]) { - n++; - if (ORTE_SUCCESS != (rc = orte_dss.arith(ival[k]->value, operand, op_flag))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - } else { - /* if none of the keyvals were found, then add them and perform the operation */ - for (n=0; n < cnt; n++) { - /* store "zero" value for that key in the container - since we already - * have converted keys into itags, all we need to do is store the data - * in the container - */ - if (ORTE_SUCCESS != (rc = add_zero_itagval(&ivptr, seg, cptr[j], keytags[n], operand->type))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* now perform the desired operation on *that* value */ - if (ORTE_SUCCESS != (rc = orte_dss.arith(ivptr->value, operand, op_flag))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - } - - return ORTE_SUCCESS; -} - -int orte_gpr_replica_increment_value_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *tokentags, - orte_std_cntr_t num_tokens, orte_std_cntr_t cnt, - orte_gpr_keyval_t **keyvals) -{ - orte_gpr_replica_container_t **cptr; - orte_gpr_replica_itag_t itag; - orte_gpr_replica_addr_mode_t tok_mode; - orte_gpr_replica_itagval_t **ival; - int rc; - orte_std_cntr_t i, j, k, m, n; - - OPAL_TRACE(2); - - /* extract the token address mode */ - tok_mode = ORTE_GPR_REPLICA_TOKMODE(addr_mode); - if (0x00 == tok_mode) { /* default tokens addressing mode to AND */ - tok_mode = ORTE_GPR_REPLICA_AND; - } - - /* find the specified container(s) */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_containers(seg, tok_mode, - tokentags, num_tokens))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 == orte_gpr_replica_globals.num_srch_cptr) { /* nothing found */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* otherwise, go through list of containers. For each one, - find the entry and then add one to its value */ - cptr = (orte_gpr_replica_container_t**)(orte_gpr_replica_globals.srch_cptr)->addr; - for (j=0, m=0; m < orte_gpr_replica_globals.num_srch_cptr && - j < (orte_gpr_replica_globals.srch_cptr)->size; j++) { /* for each container */ - if (NULL != cptr[j]) { - m++; - for (i=0; i < cnt; i++) { /* for each provided keyval to be incremented */ - if (ORTE_SUCCESS == orte_gpr_replica_dict_lookup(&itag, seg, keyvals[i]->key) && - ORTE_SUCCESS == orte_gpr_replica_search_container( - ORTE_GPR_REPLICA_OR, &itag, 1, cptr[j]) && - 0 < orte_gpr_replica_globals.num_srch_ival) { - ival = (orte_gpr_replica_itagval_t**)((orte_gpr_replica_globals.srch_ival)->addr); - for (k=0, n=0; n < orte_gpr_replica_globals.num_srch_ival && - k < (orte_gpr_replica_globals.srch_ival)->size; k++) { /* for each found keyval */ - if (NULL != ival[k]) { - n++; - if (ORTE_SUCCESS != (rc = orte_dss.increment(ival[k]->value))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - } - } - } - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_decrement_value_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *tokentags, - orte_std_cntr_t num_tokens, orte_std_cntr_t cnt, - orte_gpr_keyval_t **keyvals) -{ - orte_gpr_replica_container_t **cptr; - orte_gpr_replica_itag_t itag; - orte_gpr_replica_addr_mode_t tok_mode; - orte_gpr_replica_itagval_t **ival; - int rc; - orte_std_cntr_t i, j, k, m, n; - - OPAL_TRACE(2); - - /* extract the token address mode */ - tok_mode = ORTE_GPR_REPLICA_TOKMODE(addr_mode); - if (0x00 == tok_mode) { /* default tokens addressing mode to AND */ - tok_mode = ORTE_GPR_REPLICA_AND; - } - - /* find the specified container(s) */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_containers(seg, tok_mode, - tokentags, num_tokens))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 == orte_gpr_replica_globals.num_srch_cptr) { /* nothing found */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* otherwise, go through list of containers. For each one, - find the entry and then subtract one from its value */ - cptr = (orte_gpr_replica_container_t**)(orte_gpr_replica_globals.srch_cptr)->addr; - for (j=0, m=0; m < orte_gpr_replica_globals.num_srch_cptr && - j < (orte_gpr_replica_globals.srch_cptr)->size; j++) { /* for each container */ - if (NULL != cptr[j]) { - m++; - for (i=0; i < cnt; i++) { /* for each provided keyval to be incremented */ - if (ORTE_SUCCESS == orte_gpr_replica_dict_lookup(&itag, seg, keyvals[i]->key) && - ORTE_SUCCESS == orte_gpr_replica_search_container( - ORTE_GPR_REPLICA_OR, &itag, 1, cptr[j]) && - 0 < orte_gpr_replica_globals.num_srch_ival) { - ival = (orte_gpr_replica_itagval_t**)((orte_gpr_replica_globals.srch_ival)->addr); - for (k=0, n=0; n < orte_gpr_replica_globals.num_srch_ival && - k < (orte_gpr_replica_globals.srch_ival)->size; k++) { /* for each found keyval */ - if (NULL != ival[k]) { - n++; - if (ORTE_SUCCESS != (rc = orte_dss.decrement(ival[k]->value))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } else { - return ORTE_ERR_NOT_FOUND; - } - } - } - } - return ORTE_SUCCESS; -} - -static int add_zero_itagval(orte_gpr_replica_itagval_t **iptr, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itag_t keytag, - orte_data_type_t op_type) -{ - orte_data_type_t type; - orte_data_value_t zval = ORTE_DATA_VALUE_EMPTY; - int8_t z8=0; - int16_t z16=0; - int32_t z32=0; - int64_t z64=0; - int rc; - - zval.type = op_type; /* make the type match */ - - /* get the corresponding value - must ensure size matches */ - type = op_type; - if (ORTE_STD_CNTR == type) { - type = ORTE_STD_CNTR_T; - } - - switch (type) { - case ORTE_UINT8: - case ORTE_INT8: - zval.data = (void*)&z8; - break; - - case ORTE_UINT16: - case ORTE_INT16: - zval.data = (void*)&z16; - break; - - case ORTE_UINT32: - case ORTE_INT32: - zval.data = (void*)&z32; - break; - - case ORTE_UINT64: - case ORTE_INT64: - zval.data = (void*)&z64; - break; - } - - /* add the zero value */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_add_itagval(iptr, seg, cptr, keytag, &zval))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_cleanup_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_cleanup_fn.c deleted file mode 100644 index dab95ec1bd..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_cleanup_fn.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/class/orte_pointer_array.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/ns/ns.h" - -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" - -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - - -int orte_gpr_replica_cleanup_job_fn(orte_jobid_t jobid) -{ - int rc; - char *jobidstring, *segment; - orte_gpr_replica_segment_t *seg; - - OPAL_TRACE(2); - - if (ORTE_SUCCESS != orte_ns.convert_jobid_to_string(&jobidstring, jobid)) { - return ORTE_ERR_BAD_PARAM; - } - - asprintf(&segment, "%s-%s", ORTE_JOB_SEGMENT, jobidstring); - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, false, segment))) { - return rc; - } - - /* delete the associated job segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_release_segment(&seg))) { - return rc; - } - - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_cleanup_proc_fn(orte_process_name_t *proc) -{ - orte_gpr_replica_segment_t *seg, **seg2; - orte_gpr_replica_container_t **cptr, *cptr2; - orte_gpr_replica_itag_t itag; - char *procname, *segment, *jobidstring; - orte_jobid_t jobid; - int rc; - orte_std_cntr_t i, j; - - OPAL_TRACE(2); - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr_replica_cleanup_proc: function entered for process %s", - ORTE_NAME_PRINT(orte_process_info.my_name), ORTE_NAME_PRINT(proc)); - } - - if (ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string(&procname, proc))) { - return rc; - } - - /* find the job segment */ - jobid = proc->jobid; - - if (ORTE_SUCCESS != orte_ns.convert_jobid_to_string(&jobidstring, jobid)) { - return ORTE_ERR_BAD_PARAM; - } - - asprintf(&segment, "%s-%s", ORTE_JOB_SEGMENT, jobidstring); - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, false, segment))) { - return rc; - } - - /* find the container for this proc */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_lookup(&itag, seg, procname))) { - return rc; - } - cptr = (orte_gpr_replica_container_t**)((seg->containers)->addr); - cptr2 = NULL; - for (i=0; i < (seg->containers)->size && NULL == cptr2; i++) { - if (NULL != cptr[i]) { - for (j=0; j < cptr[i]->num_itags && NULL == cptr2; j++) { - if (itag == cptr[i]->itags[j]) { - cptr2 = cptr[i]; - } - } - } - } - - if (NULL == cptr2) { /* container not found */ - return ORTE_ERR_BAD_PARAM; - } - - /* remove the container */ - orte_gpr_replica_release_container(seg, cptr2); - - /* search all segments for this process name - remove all references - */ - seg2 = (orte_gpr_replica_segment_t**)((orte_gpr_replica.segments)->addr); - for (i=0; i < (orte_gpr_replica.segments)->size; i++) { - if (NULL != seg2[i]) { - if (ORTE_SUCCESS == orte_gpr_replica_dict_lookup(&itag, seg2[i], procname)) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_purge_itag(seg2[i], itag))) { - return rc; - } - } - } - } - - return ORTE_SUCCESS; - -} diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_del_index_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_del_index_fn.c deleted file mode 100644 index d088aede25..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_del_index_fn.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "orte/util/proc_info.h" -#include "opal/util/trace.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - - -int orte_gpr_replica_delete_entries_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *token_itags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *key_itags, orte_std_cntr_t num_keys) -{ - orte_gpr_replica_container_t **cptr; - orte_gpr_replica_itagval_t **ivals; - orte_gpr_replica_addr_mode_t tok_mode; - orte_std_cntr_t i, j, k, n, p; - int rc; - - OPAL_TRACE(2); - - /* if num_tokens == 0 and num_keys == 0, remove segment. We don't record - * any actions when doing this so that subscriptions don't fire like mad - */ - if (0 == num_tokens && 0 == num_keys) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_release_segment(&seg))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - /* initialize storage for actions taken */ - orte_pointer_array_clear(orte_gpr_replica_globals.acted_upon); - orte_gpr_replica_globals.num_acted_upon = 0; - - /* extract the token address mode */ - tok_mode = ORTE_GPR_REPLICA_TOKMODE(addr_mode); - if (0x00 == tok_mode) { /* default tokens addressing mode to AND */ - tok_mode = ORTE_GPR_REPLICA_AND; - } - - /* find the specified container(s) */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_containers(seg, tok_mode, - token_itags, num_tokens))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (0 == orte_gpr_replica_globals.num_srch_cptr) { - /* nothing found - no ERROR_LOG entry created as this is - * not a system failure. Likewise, don't return an error code - * as this is not necessarily an error - don't want to cause - * somebody to abort as a result. - */ - return ORTE_SUCCESS; - } - - /* go through the containers looking for the specified entries, - * removing those that are found - */ - cptr = (orte_gpr_replica_container_t**)(orte_gpr_replica_globals.srch_cptr)->addr; - for (j=0, k=0; k < orte_gpr_replica_globals.num_srch_cptr && - j < (orte_gpr_replica_globals.srch_cptr)->size; j++) { - if (NULL != cptr[j]) { - k++; - /* If no keys are provided, then remove entire container */ - if (0 < num_tokens && 0 == num_keys){ - rc = orte_gpr_replica_release_container(seg, cptr[j]); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - else if( 0 < num_keys) { - for (i=0; i < num_keys; i++) { /* for each provided key */ - if (ORTE_SUCCESS == orte_gpr_replica_search_container( - ORTE_GPR_REPLICA_OR, - key_itags, 1, cptr[j])) { - if (0 < orte_gpr_replica_globals.num_srch_ival) { - /* found this key at least once - delete all - * occurrences - */ - ivals = (orte_gpr_replica_itagval_t**) - (orte_gpr_replica_globals.srch_ival)->addr; - for (n=0, p=0; p < orte_gpr_replica_globals.num_srch_ival && - n < (orte_gpr_replica_globals.srch_ival)->size; n++) { - if (NULL != ivals[n]) { - p++; - if (ORTE_SUCCESS != (rc = orte_gpr_replica_delete_itagval(seg, cptr[j], ivals[n]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if ( 0 == ((cptr[j])->itagvals)->size) { - /* If container is empty, remove it */ - rc = orte_gpr_replica_release_container(seg, cptr[j]); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - } - } - } - } - } - } - - return ORTE_SUCCESS; -} - -int orte_gpr_replica_delete_entries_nb_fn( - orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *token_itags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *key_tags, orte_std_cntr_t num_keys) -{ - OPAL_TRACE(2); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - - -int orte_gpr_replica_index_fn(orte_gpr_replica_segment_t *seg, - orte_std_cntr_t *cnt, char ***index) -{ - char **ptr; - orte_gpr_replica_segment_t **segs; - orte_gpr_replica_dict_entry_t **dict; - orte_std_cntr_t i, j; - - OPAL_TRACE(2); - - /* set default responses */ - *index = NULL; - *cnt = 0; - - if (NULL == seg) { /* looking for index of global registry */ - /* it is possible that no segments might exist - for example, if someone - * requested an index immediately after system start. Protect against - * that case - */ - if (0 == orte_gpr_replica.num_segs) { - return ORTE_SUCCESS; - } - *index = (char**)malloc(orte_gpr_replica.num_segs * sizeof(char*)); - if (NULL == *index) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - ptr = *index; - segs = (orte_gpr_replica_segment_t**) (orte_gpr_replica.segments)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_segs && - i < (orte_gpr_replica.segments)->size; i++) { - if (NULL != segs[i]) { - ptr[j] = strdup(segs[i]->name); - if (NULL == ptr[j]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - *cnt = j; - return ORTE_ERR_OUT_OF_RESOURCE; - } - j++; - } - } - *cnt = orte_gpr_replica.num_segs; - return ORTE_SUCCESS; - } - - /* must have requested index of a specific segment */ - if (0 < seg->num_dict_entries) { - *index = (char**)malloc(orte_gpr_replica.num_segs * sizeof(char*)); - if (NULL == *index) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - ptr = *index; - dict = (orte_gpr_replica_dict_entry_t**)(seg->dict)->addr; - - for( i = 0, j = 0; ((j < seg->num_dict_entries) && - (i < (seg->dict)->size)); i++ ) { - if(NULL == dict[i]) continue; - ptr[j] = strdup(dict[i]->string); - if (NULL == ptr[j]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - *cnt = j; - return ORTE_ERR_OUT_OF_RESOURCE; - } - j++; - } - *cnt = seg->num_dict_entries; - return ORTE_SUCCESS; - } - - /* it's okay if there are no entries, so return success */ - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_index_nb_fn(orte_gpr_replica_segment_t *seg, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(2); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_dict_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_dict_fn.c deleted file mode 100644 index 420da3e7b8..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_dict_fn.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - support functions. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - -/* - */ -bool orte_gpr_replica_check_itag_list( orte_gpr_replica_addr_mode_t addr_mode, - orte_std_cntr_t num_itags_search, - orte_gpr_replica_itag_t *itags, - orte_std_cntr_t num_itags_entry, - orte_gpr_replica_itag_t *entry_itags ) -{ - orte_std_cntr_t i, j; - bool exclusive, match, not_set; - int found_some; - - OPAL_TRACE(3); - - /* check for trivial case */ - if (NULL == itags || 0 == num_itags_search) { /* wildcard case - automatically true */ - return true; - } - - if (ORTE_GPR_REPLICA_NOT & addr_mode) { /* NOT flag set */ - not_set = true; - } else { - not_set = false; - } - - if (ORTE_GPR_REPLICA_XAND & addr_mode || ORTE_GPR_REPLICA_XOR & addr_mode) { - exclusive = true; - } else { - exclusive = false; - } - - /* run the search - check the container's tags to see which search tags are found */ - found_some = 0; - for (i=0; i < num_itags_entry; i++) { /* for each container tag */ - match = false; - for (j=0; j < num_itags_search; j++) { /* check each search tag and see if it is present */ - if (entry_itags[i] == itags[j]) { /* found a match */ - if (ORTE_GPR_REPLICA_OR & addr_mode) { /* only need one match */ - return (!not_set); - } - match = true; - found_some++; - break; /* we're done for this j */ - } - } - if (!match && exclusive) { - /* if it was exclusive, then I'm not allowed to have any tags outside - * of those in the search list. Since I checked the search list and - * found at least one that didn't match, this violates the exclusive requirement. - */ - return (not_set); - } - } - - /* If we get here, then we know we have passed the exclusive test. We also know - * that we would have already returned in the OR case. So, first check the XOR - * case - */ - if ((ORTE_GPR_REPLICA_XOR & addr_mode) && (0 < found_some) ) { - return (!not_set); - } - - /* As we counted the number of matched itags we can simply compare this - * number with the total number of itags for the AND operation. - */ - if( found_some != num_itags_search ) { - return (not_set); - } - - /* okay, all the tags are there, so we now passed the AND test */ - return (!not_set); -} - - -int orte_gpr_replica_copy_itag_list(orte_gpr_replica_itag_t **dest, - orte_gpr_replica_itag_t *src, orte_std_cntr_t num_itags) -{ - if (0 == num_itags || NULL == src) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - OPAL_TRACE(3); - - *dest = (orte_gpr_replica_itag_t*)malloc(num_itags * sizeof(orte_gpr_replica_itag_t)); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - memcpy(*dest, src, num_itags*sizeof(orte_gpr_replica_itag_t)); - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_dump_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_dump_fn.c deleted file mode 100644 index 992ff2dab6..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_dump_fn.c +++ /dev/null @@ -1,889 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "orte/dss/dss.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/smr/smr_types.h" - -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" -#include "gpr_replica_fn.h" - -static void orte_gpr_replica_dump_load_string(orte_buffer_t *buffer, char **tmp); - -void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer, - orte_gpr_replica_itagval_t *iptr); - -static int orte_gpr_replica_get_segment_size_fn(size_t *segsize, orte_gpr_replica_segment_t *seg); - - -int orte_gpr_replica_dump_all_fn(orte_buffer_t *buffer) -{ - char tmp_out[80], *tmp; - int rc; - - tmp = tmp_out; - sprintf(tmp_out, "\n\n\nDUMP OF GENERAL PURPOSE REGISTRY"); - orte_gpr_replica_dump_load_string(buffer, &tmp); - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer, 0))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer, 0))) { - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_callbacks_fn(buffer))) { - return rc; - } - - rc = orte_gpr_replica_dump_segments_fn(buffer, NULL); - - return rc; -} - -int orte_gpr_replica_dump_segments_fn(orte_buffer_t *buffer, char *segment) -{ - orte_gpr_replica_segment_t **seg, *segptr; - orte_std_cntr_t i, m; - int rc; - - /* if segment = NULL, loop through all segments */ - if (NULL == segment) { - seg = (orte_gpr_replica_segment_t**)(orte_gpr_replica.segments)->addr; - for (i=0, m=0; m < orte_gpr_replica.num_segs && - i < (orte_gpr_replica.segments)->size; i++) { - if (NULL != seg[i]) { - m++; - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_a_segment_fn(buffer, seg[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - return ORTE_SUCCESS; - } - - /* otherwise, dump just the one specified */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&segptr, false, segment))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_a_segment_fn(buffer, segptr))) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; - } - - int orte_gpr_replica_dump_a_segment_fn(orte_buffer_t *buffer, orte_gpr_replica_segment_t *seg) - { - orte_gpr_replica_container_t **cptr; - orte_gpr_replica_itag_t *itaglist; - orte_gpr_replica_itagval_t **iptr; - char *token; - orte_std_cntr_t num_objects; - orte_std_cntr_t j, k, n, p; - char *tmp_out; - - tmp_out = (char*)malloc(1000); - if (NULL == tmp_out) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - sprintf(tmp_out, "\nDUMP OF GPR SEGMENT %s", seg->name); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - num_objects = (seg->containers)->size - (seg->containers)->number_free; - - sprintf(tmp_out, "\tNumber of containers: %lu\n", - (unsigned long) num_objects); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - /* loop through all containers and print their info and contents */ - cptr = (orte_gpr_replica_container_t**)(seg->containers)->addr; - for (j=0, n=0; n < seg->num_containers && - j < (seg->containers)->size; j++) { - if (NULL != cptr[j]) { - n++; - sprintf(tmp_out, "\n\tInfo for container %lu" - "\tNumber of keyvals: %lu" - "\n\tTokens:\n", - (unsigned long) j, - ((unsigned long) (cptr[j]->itagvals)->size - (cptr[j]->itagvals)->number_free)); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - /* reverse lookup tokens and print them */ - itaglist = cptr[j]->itags; - for (k=0; k < cptr[j]->num_itags; k++) { - if (ORTE_SUCCESS != orte_gpr_replica_dict_reverse_lookup( - &token, seg, itaglist[k])) { - sprintf(tmp_out, "\t\titag num %lu" - ": No entry found for itag %lu", - (unsigned long) k, - (unsigned long) itaglist[k]); - } else { - sprintf(tmp_out, "\t\titag num %lu: itag %lu\tToken: %s", - (unsigned long) k, - (unsigned long) itaglist[k], token); - free(token); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - - sprintf(tmp_out, "\n\tKeyval info:"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - /* loop through all itagvals and print their info */ - iptr = (orte_gpr_replica_itagval_t**)(cptr[j]->itagvals)->addr; - for (k=0, p=0; p < cptr[j]->num_itagvals && - k < (cptr[j]->itagvals)->size; k++) { - if (NULL != iptr[k]) { - p++; - if (ORTE_SUCCESS != orte_gpr_replica_dict_reverse_lookup( - &token, seg, iptr[k]->itag)) { - sprintf(tmp_out, "\n\t\titag num %lu: No entry found for itag %lu", - (unsigned long) k, - (unsigned long) iptr[k]->itag); - } else { - sprintf(tmp_out, "\n\t\tEntry %lu: itag %lu\tKey: %s", - (unsigned long) k, - (unsigned long) iptr[k]->itag, token); - free(token); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - orte_gpr_replica_dump_itagval_value(buffer, iptr[k]); - } - } - } - } - - free(tmp_out); - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer) -{ - orte_gpr_replica_callbacks_t *cb; - orte_gpr_replica_action_taken_t **action; - orte_gpr_replica_itag_t *itaglist; - char *tmp_out, *token; - orte_std_cntr_t i, j, k; - - tmp_out = (char*)malloc(1000); - if (NULL == tmp_out) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - sprintf(tmp_out, "\nDUMP OF GPR REGISTERED CALLBACKS\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - if (0 >= (k = (orte_std_cntr_t)opal_list_get_size(&(orte_gpr_replica.callbacks)))) { - sprintf(tmp_out, "--- None registered at this time ---"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } else { - sprintf(tmp_out, "--- %lu callback(s) registered at this time", - (unsigned long) k); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - i=0; - for (cb = (orte_gpr_replica_callbacks_t*)opal_list_get_first(&(orte_gpr_replica.callbacks)); - cb != (orte_gpr_replica_callbacks_t*)opal_list_get_end(&(orte_gpr_replica.callbacks)); - cb = (orte_gpr_replica_callbacks_t*)opal_list_get_next(cb)) { - if (NULL == cb) { - sprintf(tmp_out, "\n\t--- BAD CALLBACK POINTER %lu ---", - (unsigned long) i); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - return ORTE_SUCCESS; - } - sprintf(tmp_out, "\nInfo for callback %lu", (unsigned long) i); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - if (NULL == cb->requestor) { - sprintf(tmp_out, "Local requestor"); - } else { - sprintf(tmp_out, "Requestor: %s", - ORTE_NAME_PRINT(cb->requestor)); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - orte_gpr_base_dump_notify_msg(buffer, cb->message); - i++; - } - } - - sprintf(tmp_out, "\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - if (0 < orte_gpr_replica_globals.num_acted_upon) { - sprintf(tmp_out, "\nDUMP OF GPR ACTION RECORDS\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - action = (orte_gpr_replica_action_taken_t**)orte_gpr_replica_globals.acted_upon->addr; - for (i=0, j=0; j < orte_gpr_replica_globals.num_acted_upon && - i < (orte_gpr_replica_globals.acted_upon)->size; i++) { - if (NULL != action[i]) { - j++; - if (NULL != action[i]->seg) { - sprintf(tmp_out, "\nAction Taken on Segment: %s", action[i]->seg->name); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } else { - sprintf(tmp_out, "\nAction Taken on NULL Segment"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (NULL != action[i]->cptr) { - sprintf(tmp_out, "\tContainer Tokens:"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - /* reverse lookup tokens and print them */ - itaglist = action[i]->cptr->itags; - for (k=0; k < action[i]->cptr->num_itags; k++) { - if (ORTE_SUCCESS != orte_gpr_replica_dict_reverse_lookup( - &token, action[i]->seg, itaglist[k])) { - sprintf(tmp_out, "\t\titag num %lu" - ": No entry found for itag %lu", - (unsigned long) k, - (unsigned long) itaglist[k]); - } else { - sprintf(tmp_out, "\t\titag num %lu: itag %lu\tToken: %s", - (unsigned long) k, - (unsigned long) itaglist[k], token); - free(token); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - } else { - sprintf(tmp_out, "\tNULL Container"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (NULL != action[i]->iptr) { - if (ORTE_GPR_REPLICA_ENTRY_ADDED & action[i]->action) { - sprintf(tmp_out, "\n\tKeyval ADDED:"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_REPLICA_ENTRY_DELETED & action[i]->action) { - sprintf(tmp_out, "\n\tKeyval DELETED:"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_REPLICA_ENTRY_CHANGED & action[i]->action) { - sprintf(tmp_out, "\n\tKeyval CHANGED"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_REPLICA_ENTRY_CHG_TO & action[i]->action) { - sprintf(tmp_out, "\t\tKeyval CHANGED TO:"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_REPLICA_ENTRY_CHG_FRM & action[i]->action) { - sprintf(tmp_out, "\t\tKeyval CHANGED FROM:"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - - if (ORTE_SUCCESS != orte_gpr_replica_dict_reverse_lookup( - &token, action[i]->seg, action[i]->iptr->itag)) { - sprintf(tmp_out, "\t\tNo entry found for itag %lu", - (unsigned long) action[i]->iptr->itag); - } else { - sprintf(tmp_out, "\t\titag %lu\tKey: %s", - (unsigned long) action[i]->iptr->itag, token); - free(token); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - orte_gpr_replica_dump_itagval_value(buffer, action[i]->iptr); - } else { - sprintf(tmp_out, "\tNULL Keyval"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - } - } - } else { - sprintf(tmp_out, "\nNO GPR ACTION RECORDS STORED\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - - free(tmp_out); - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer, - orte_gpr_trigger_id_t start) -{ - orte_gpr_replica_trigger_t **trig; - char tmp_out[100], *tmp; - orte_std_cntr_t j, k, m; - int rc; - - tmp = tmp_out; - sprintf(tmp_out, "\nDUMP OF GPR TRIGGERS\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp); - - trig = (orte_gpr_replica_trigger_t**)((orte_gpr_replica.triggers)->addr); - sprintf(tmp_out, "Number of triggers: %lu\n", (unsigned long) orte_gpr_replica.num_trigs); - orte_gpr_replica_dump_load_string(buffer, &tmp); - - /* dump the trigger info for the registry */ - if (0 == start) { /* dump the whole thing */ - m = 0; - } else { - m = orte_gpr_replica.num_trigs - start; - } - - for (j=0, k=0; k < orte_gpr_replica.num_trigs && - j < (orte_gpr_replica.triggers)->size; j++) { - if (NULL != trig[j]) { - if (k >= m) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(buffer, trig[j]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - k++; - } - } - - return ORTE_SUCCESS; -} - -int orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, - orte_gpr_replica_trigger_t *trig) -{ - char *tmp_out, *token; - orte_std_cntr_t i, j; - orte_gpr_replica_counter_t **cntr; - orte_gpr_replica_subscription_t **subs; - orte_gpr_replica_trigger_requestor_t **attached; - - tmp_out = (char*)malloc(1000); - if (NULL == tmp_out) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - sprintf(tmp_out, "\nData for trigger %lu", (unsigned long) trig->index); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - if (NULL == trig->name) { - sprintf(tmp_out, "\tNOT a named trigger"); - } else { - sprintf(tmp_out, "\ttrigger name: %s", trig->name); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - if (0 < trig->num_attached) { - sprintf(tmp_out, "\t%lu requestors attached to this trigger", - (unsigned long) trig->num_attached); - } else { - sprintf(tmp_out, "\tNo requestors attached to this trigger"); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - attached = (orte_gpr_replica_trigger_requestor_t**) - (trig->attached)->addr; - for (i=0, j=0; j < trig->num_attached && - i < (trig->attached)->size; i++) { - if (NULL != attached[i]) { - j++; - if (NULL == attached[i]->requestor) { - sprintf(tmp_out, "\t\tRequestor %lu: LOCAL@idtag %lu", - (unsigned long)j, (unsigned long)attached[i]->idtag); - } else { - sprintf(tmp_out, "\t\tRequestor %lu: %s@idtag %lu", - (unsigned long)j, ORTE_NAME_PRINT(attached[i]->requestor), - (unsigned long)attached[i]->idtag); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - } - - if (NULL == trig->master) { - sprintf(tmp_out, "\tNO MASTER registered"); - } else { - if (NULL == trig->master->requestor) { - sprintf(tmp_out, "\tTRIGGER MASTER: LOCAL@idtag %lu", - (unsigned long)trig->master->idtag); - } else { - sprintf(tmp_out, "\tTRIGGER MASTER: %s@idtag %lu", - ORTE_NAME_PRINT(trig->master->requestor), - (unsigned long)trig->master->idtag); - } - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - if (ORTE_GPR_TRIG_ONE_SHOT & trig->action) { - sprintf(tmp_out, "\tORTE_GPR_TRIG_ONE_SHOT"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_TRIG_AT_LEVEL & trig->action) { - sprintf(tmp_out, "\tORTE_GPR_TRIG_AT_LEVEL"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_TRIG_CMP_LEVELS & trig->action) { - sprintf(tmp_out, "\tORTE_GPR_TRIG_CMP_LEVELS"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS & trig->action) { - sprintf(tmp_out, "\tORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - - if (trig->one_shot_fired) { - sprintf(tmp_out, "\tONE SHOT HAS FIRED"); - } else { - sprintf(tmp_out, "\tONE SHOT HAS NOT FIRED"); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - if (0 < trig->num_counters) { - if (ORTE_GPR_TRIG_AT_LEVEL & trig->action) { - sprintf(tmp_out, "\tTrigger monitoring %lu counters for level", - (unsigned long) trig->num_counters); - } else { - sprintf(tmp_out, "\tTrigger monitoring %lu counters for compare", - (unsigned long) trig->num_counters); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - cntr = (orte_gpr_replica_counter_t**)((trig->counters)->addr); - for (i=0, j=0; j < trig->num_counters && - i < (trig->counters)->size; i++) { - if (NULL != cntr[i] && - ORTE_SUCCESS == orte_gpr_replica_dict_reverse_lookup(&token, cntr[i]->seg, - (cntr[i]->iptr)->itag)) { - j++; - sprintf(tmp_out, "\t\tCounter: %lu\tSegment: %s\tName: %s", - (unsigned long) i, (cntr[i]->seg)->name, token); - free(token); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - if (ORTE_GPR_TRIG_AT_LEVEL & trig->action) { - sprintf(tmp_out, "\t\tTrigger Level:"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - orte_gpr_replica_dump_itagval_value(buffer, &(cntr[i]->trigger_level)); - } - sprintf(tmp_out, "\t\tCurrent Value:"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - orte_gpr_replica_dump_itagval_value(buffer, cntr[i]->iptr); - } - } - } - - if (0 < trig->num_subscriptions) { - sprintf(tmp_out, "\tTrigger has %lu subscriptions attached to it", - (unsigned long) trig->num_subscriptions); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - subs = (orte_gpr_replica_subscription_t**)((trig->subscriptions)->addr); - for (i=0, j=0; j < trig->num_subscriptions && - i < (trig->subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - orte_gpr_replica_dump_subscription(buffer, subs[i]); - } - } - } - - free(tmp_out); - return ORTE_SUCCESS; -} - -int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer, - orte_gpr_subscription_id_t start) -{ - char *tmp_out, *tmp; - orte_std_cntr_t i, m, n; - orte_gpr_replica_subscription_t **subs; - int rc; - - tmp_out = (char*)malloc(1000); - if (NULL == tmp_out) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - tmp = tmp_out; - - sprintf(tmp_out, "\nDUMP OF GPR SUBSCRIPTIONS\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp); - - subs = (orte_gpr_replica_subscription_t**)((orte_gpr_replica.subscriptions)->addr); - sprintf(tmp_out, "Number of subscriptions: %lu\n", (unsigned long) orte_gpr_replica.num_subs); - orte_gpr_replica_dump_load_string(buffer, &tmp); - - /* dump the subscription info for the registry */ - if (0 == start) { /* dump the whole thing */ - n = 0; - } else { - n = orte_gpr_replica.num_subs - start; - } - - for (i=0, m=0; m < orte_gpr_replica.num_subs && - i < (orte_gpr_replica.subscriptions)->size; i++) { - if (NULL != subs[i]) { - if (m >= n) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(buffer, subs[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - m++; - } - } - free(tmp_out); - return ORTE_SUCCESS; -} - -int orte_gpr_replica_dump_subscription(orte_buffer_t *buffer, - orte_gpr_replica_subscription_t *sub) -{ - char *tmp_out, *token, *tmp; - orte_std_cntr_t j, k, n, p; - orte_gpr_replica_requestor_t **reqs; - orte_gpr_replica_ivalue_t **ivals; - - tmp_out = (char*)malloc(1000); - if (NULL == tmp_out) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - tmp = tmp_out; - - if (NULL == sub->name) { - sprintf(tmp, "\nSubscription %lu: UNNAMED idtag %lu", - (unsigned long) sub->index, (unsigned long) sub->idtag); - } else { - sprintf(tmp, "\nSubscription %lu: name %s idtag %lu", - (unsigned long) sub->index, - sub->name, (unsigned long) sub->idtag); - } - orte_gpr_replica_dump_load_string(buffer, &tmp); - - if (sub->active) { - sprintf(tmp_out, "\tSubscription ACTIVE"); - } else { - sprintf(tmp_out, "\tSubscription INACTIVE"); - } - orte_gpr_replica_dump_load_string(buffer, &tmp); - - if (sub->cleanup) { - sprintf(tmp_out, "\tSubscription scheduled for cleanup"); - } else { - sprintf(tmp_out, "\tSubscription NOT scheduled for cleanup"); - } - orte_gpr_replica_dump_load_string(buffer, &tmp); - - /* output recipient info */ - sprintf(tmp_out, "\tList of requestors for this subscription:"); - orte_gpr_replica_dump_load_string(buffer, &tmp); - reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr; - for (j=0, k=0; k < sub->num_requestors && - j < (sub->requestors)->size; j++) { - if (NULL != reqs[j]) { - k++; - if (NULL == reqs[j]->requestor) { - sprintf(tmp_out, "\t\tRequestor: LOCAL @ subscription id %lu", - (unsigned long) reqs[j]->idtag); - } else { - sprintf(tmp_out, "\t\tRequestor: %s @ subscription id %lu", - ORTE_NAME_PRINT(reqs[j]->requestor), - (unsigned long) reqs[j]->idtag); - } - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - } - - sprintf(tmp_out, "\tActions:"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - if (ORTE_GPR_NOTIFY_VALUE_CHG & sub->action) { - sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_VALUE_CHG"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } else if (ORTE_GPR_NOTIFY_VALUE_CHG_TO & sub->action) { - sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_VALUE_CHG_TO"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } else if (ORTE_GPR_NOTIFY_VALUE_CHG_FRM & sub->action) { - sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_VALUE_CHG_FRM"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_NOTIFY_DEL_ENTRY & sub->action) { - sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_DEL_ENTRY"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_NOTIFY_ADD_ENTRY & sub->action) { - sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_ADD_ENTRY"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_NOTIFY_PRE_EXISTING & sub->action) { - sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_PRE_EXISTING"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & sub->action) { - sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_STARTS_AFTER_TRIG"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & sub->action) { - sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_DELETE_AFTER_TRIG"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - - sprintf(tmp_out, "\n\tData covered by this subscription"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - ivals = (orte_gpr_replica_ivalue_t**)(sub->values)->addr; - for (n=0, p=0; p < sub->num_values && - n < (sub->values)->size; n++) { - if (NULL != ivals[n]) { - p++; - sprintf(tmp_out, "\t\tData on segment %s", (ivals[n]->seg)->name); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - k = (int)orte_value_array_get_size(&(ivals[n]->tokentags)); - if (0 == k) { - sprintf(tmp_out, "\t\tNULL token (wildcard)"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } else { - sprintf(tmp_out, "\t\tNumber of tokens: %lu", - (unsigned long) k); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - for (j=0; j < k; j++) { - if (ORTE_SUCCESS == orte_gpr_replica_dict_reverse_lookup(&token, ivals[n]->seg, - ORTE_VALUE_ARRAY_GET_ITEM(&(ivals[n]->tokentags), orte_gpr_replica_itag_t, j))) { - sprintf(tmp_out, "\t\t\tToken: %s", token); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - free(token); - } - } - } - - sprintf(tmp_out, "\t\tToken addressing mode:\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - if (ORTE_GPR_TOKENS_NOT & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_NOT\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_TOKENS_AND & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_AND\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_TOKENS_OR & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_OR\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_TOKENS_XAND & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_XAND\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_TOKENS_XOR & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_XOR\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - - k = (int)orte_value_array_get_size(&(ivals[n]->keytags)); - if (0 == k) { - sprintf(tmp_out, "\t\tNULL key (wildcard)"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } else { - sprintf(tmp_out, "\t\tNumber of keys: %lu", (unsigned long) k); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - for (j=0; j < k; j++) { - if (ORTE_SUCCESS == orte_gpr_replica_dict_reverse_lookup(&token, ivals[n]->seg, - ORTE_VALUE_ARRAY_GET_ITEM(&(ivals[n]->keytags), orte_gpr_replica_itag_t, j))) { - sprintf(tmp_out, "\t\t\tKey: %s", token); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - free(token); - } - } - } - - sprintf(tmp_out, "\t\tKey addressing mode:\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - - if (ORTE_GPR_KEYS_NOT & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_NOT\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_KEYS_AND & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_AND\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_KEYS_OR & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_OR\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_KEYS_XAND & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_XAND\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - if (ORTE_GPR_KEYS_XOR & ivals[n]->addr_mode) { - sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_XOR\n"); - orte_gpr_replica_dump_load_string(buffer, &tmp_out); - } - } /* if ivals[n] not NULL */ - } /* for n */ - - free(tmp_out); - return ORTE_SUCCESS; -} - - -void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer, - orte_gpr_replica_itagval_t *iptr) -{ - char *tmp; - int rc; - - if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp, "\t\t\t", iptr->value, ORTE_DATA_VALUE))) { - ORTE_ERROR_LOG(rc); - return; - } - - if (NULL == buffer) { - opal_output(0, "%s", tmp); - } else { - orte_gpr_replica_dump_load_string(buffer, &tmp); - } - - free(tmp); -} - - -int orte_gpr_replica_dump_segment_size_fn(orte_buffer_t *buffer, char *segment) -{ - orte_gpr_replica_segment_t **seg, *segptr; - orte_std_cntr_t i, m; - size_t segsize, total; - char tmp[100], *tptr; - int rc; - - tptr = tmp; - - /* if segment = NULL, loop through all segments */ - if (NULL == segment) { - seg = (orte_gpr_replica_segment_t**)(orte_gpr_replica.segments)->addr; - total = 0; - for (i=0, m=0; m < orte_gpr_replica.num_segs && - i < (orte_gpr_replica.segments)->size; i++) { - if (NULL != seg[i]) { - m++; - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_segment_size_fn(&segsize, seg[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - total += segsize; - } - } - sprintf(tmp, "Total registry size: %lu bytes", (unsigned long)total); - orte_gpr_replica_dump_load_string(buffer, &tptr); - - return ORTE_SUCCESS; - } - - /* otherwise, get the size of just the one specified */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&segptr, false, segment))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_segment_size_fn(&segsize, segptr))) { - ORTE_ERROR_LOG(rc); - return rc; - } - sprintf(tmp, "Size of segment %s: %lu bytes", segment, (unsigned long)segsize); - orte_gpr_replica_dump_load_string(buffer, &tptr); - - return ORTE_SUCCESS; -} - - -static void orte_gpr_replica_dump_load_string(orte_buffer_t *buffer, char **tmp) -{ - orte_dss.pack(buffer, tmp, 1, ORTE_STRING); -} - -static int orte_gpr_replica_get_segment_size_fn(size_t *segsize, orte_gpr_replica_segment_t *seg) -{ - size_t data_size, isize; - orte_std_cntr_t i, j, k, m; - orte_gpr_replica_dict_entry_t **dict; - orte_gpr_replica_container_t **cptr; - orte_gpr_replica_itagval_t **iptr; - int rc; - - data_size = strlen(seg->name); - data_size += 2*sizeof(orte_gpr_replica_itag_t); /* itag, num_dict_entries */ - - data_size += (seg->dict)->size * sizeof(void*); /* account for size of pointer array */ - dict = (orte_gpr_replica_dict_entry_t**)(seg->dict)->addr; - for (i=0, j=0; j < seg->num_dict_entries && - i < (seg->dict)->size; i++) { - if (NULL != dict[i]) { - j++; - data_size += dict[i]->length + 1; - } - } - - data_size += sizeof(orte_std_cntr_t); /* num_containers */ - cptr = (orte_gpr_replica_container_t**)(seg->containers)->addr; - for (i=0, j=0; j < (seg->num_containers) && - i < (seg->containers)->size; i++) { - if (NULL != cptr[i]) { - j++; - data_size += sizeof(orte_std_cntr_t); /* index */ - data_size += cptr[i]->num_itags * sizeof(orte_gpr_replica_itag_t); /* itags array */ - data_size += sizeof(orte_std_cntr_t); /* num_itags */ - data_size += (cptr[i]->itagvals)->size * sizeof(void*); /* account for size of pointer array */ - data_size += sizeof(orte_std_cntr_t); /* num_itagvals */ - iptr = (orte_gpr_replica_itagval_t**)(cptr[i]->itagvals)->addr; - for (k=0, m=0; m < cptr[i]->num_itagvals && - k < (cptr[i]->itagvals)->size; k++) { - if (NULL != iptr[k]) { - m++; - data_size += sizeof(orte_std_cntr_t); /* index */ - data_size += sizeof(orte_gpr_replica_itag_t); - data_size += sizeof(orte_data_type_t); - if (ORTE_SUCCESS != (rc = orte_dss.size(&isize, iptr[k]->value->data, iptr[k]->value->type))) { - ORTE_ERROR_LOG(rc); - *segsize = 0; - return rc; - } - data_size += isize; - } - } - data_size += 3*sizeof(orte_std_cntr_t); - data_size += (cptr[i]->itaglist).array_size * sizeof(unsigned char*); - } - } - - *segsize = data_size; - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h b/orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h deleted file mode 100644 index e95d4cbf1c..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - support functions. - * - */ - -#ifndef MCA_GPR_REPLICA_FN_H_ -#define MCA_GPR_REPLICA_FN_H_ - -#include "orte_config.h" - -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/gpr/replica/gpr_replica.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * The "fn" layer of the registry API functions - not accessible from outside - * the replica - */ - - /* - * Arithemetic operations - */ -int orte_gpr_replica_arith_op_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *itags, - orte_std_cntr_t num_tokens, - orte_std_cntr_t cnt, - orte_gpr_replica_itag_t *keytags, - orte_dss_arith_op_t op_flag, - orte_data_value_t *operand); - -int orte_gpr_replica_increment_value_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *itags, - orte_std_cntr_t num_tokens, orte_std_cntr_t cnt, - orte_gpr_keyval_t **keyvals); - -int orte_gpr_replica_decrement_value_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *itags, - orte_std_cntr_t num_tokens, orte_std_cntr_t cnt, - orte_gpr_keyval_t **keyvals); - -/* - * Delete-index functions - */ -int orte_gpr_replica_delete_entries_fn(orte_gpr_addr_mode_t mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *token_itags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *key_tags, orte_std_cntr_t num_keys); - -int orte_gpr_replica_delete_entries_nb_fn( - orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *token_itags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *key_tags, orte_std_cntr_t num_keys); - -int orte_gpr_replica_index_fn(orte_gpr_replica_segment_t *seg, - orte_std_cntr_t *cnt, char ***index); - -int orte_gpr_replica_index_nb_fn(orte_gpr_replica_segment_t *seg, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - - -/* - * Cleanup functions - */ -int orte_gpr_replica_cleanup_job_fn(orte_jobid_t jobid); - -int orte_gpr_replica_cleanup_proc_fn(orte_process_name_t *proc); - - -/* - * Put-get functions - */ -int orte_gpr_replica_put_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *token_itags, orte_std_cntr_t num_tokens, - orte_std_cntr_t cnt, orte_gpr_keyval_t **keyvals); - -int orte_gpr_replica_put_nb_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *token_itags, orte_std_cntr_t num_tokens, - orte_std_cntr_t cnt, orte_gpr_keyval_t **keyvals, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - -int orte_gpr_replica_get_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *tokentags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *keytags, orte_std_cntr_t num_keys, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values); - -int orte_gpr_replica_get_conditional_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *tokentags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *keytags, orte_std_cntr_t num_keys, - orte_std_cntr_t num_conditions, orte_gpr_replica_itagval_t **conditions, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values); - -int orte_gpr_replica_get_nb_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *tokentags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *keytags, orte_std_cntr_t num_keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag); - - -/* - * Subscribe functions - */ -int orte_gpr_replica_subscribe_fn(orte_process_name_t *requestor, - orte_std_cntr_t num_subs, - orte_gpr_subscription_t **subscriptions, - orte_std_cntr_t num_trigs, - orte_gpr_trigger_t **trigs); - -/* - * Diagnostic functions - */ -int orte_gpr_replica_dump_all_fn(orte_buffer_t *buffer); - -int orte_gpr_replica_dump_segments_fn(orte_buffer_t *buffer, char *segment); - -int orte_gpr_replica_dump_a_segment_fn(orte_buffer_t *buffer, orte_gpr_replica_segment_t *seg); - -int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer, - orte_gpr_trigger_id_t start); - -int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer, - orte_gpr_subscription_id_t start); - -int orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, - orte_gpr_replica_trigger_t *trig); - -int orte_gpr_replica_dump_subscription(orte_buffer_t *buffer, - orte_gpr_replica_subscription_t *sub); - -int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer); - -int orte_gpr_replica_dump_segment_size_fn(orte_buffer_t *buffer, char *segment); - -/* - * ********* INTERNAL UTILITY FUNCTIONS ********** - */ - -/** SEGMENT OPERATIONS - */ -int orte_gpr_replica_release_segment(orte_gpr_replica_segment_t **seg); - -int orte_gpr_replica_find_containers(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_addr_mode_t addr_mode, - orte_gpr_replica_itag_t *taglist, orte_std_cntr_t num_tags); - -int orte_gpr_replica_create_container(orte_gpr_replica_container_t **cptr, - orte_gpr_replica_segment_t *seg, - orte_std_cntr_t num_itags, - orte_gpr_replica_itag_t *itags); - -int orte_gpr_replica_release_container(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr); - -int orte_gpr_replica_add_keyval(orte_gpr_replica_itagval_t **ivalptr, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_keyval_t *kptr); - -int orte_gpr_replica_add_itagval(orte_gpr_replica_itagval_t **ivalptr, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itag_t itag, - orte_data_value_t *dval); - -int orte_gpr_replica_update_keyval(orte_gpr_replica_itagval_t **iptr, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_keyval_t *kptr); - - -int orte_gpr_replica_purge_itag(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t itag); - -int orte_gpr_replica_search_container(orte_gpr_replica_addr_mode_t addr_mode, - orte_gpr_replica_itag_t *itags, orte_std_cntr_t num_itags, - orte_gpr_replica_container_t *cptr); - -bool orte_gpr_replica_value_in_container(orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itagval_t *iptr); - -int orte_gpr_replica_delete_itagval(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itagval_t *iptr); - -/* - * DICTIONARY OPERATIONS - */ - -bool orte_gpr_replica_check_itag_list(orte_gpr_replica_addr_mode_t mode, - orte_std_cntr_t num_itags_search, - orte_gpr_replica_itag_t *itags, - orte_std_cntr_t num_itags_entry, - orte_gpr_replica_itag_t *entry_itags); - -int orte_gpr_replica_copy_itag_list(orte_gpr_replica_itag_t **dest, - orte_gpr_replica_itag_t *src, orte_std_cntr_t num_itags); - -void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer, - orte_gpr_replica_itagval_t *iptr); - -/* - * Trigger Operations - */ -int orte_gpr_replica_enter_local_subscription(orte_std_cntr_t cnt, orte_gpr_subscription_t **subscriptions); - -int orte_gpr_replica_enter_local_trigger(orte_std_cntr_t cnt, orte_gpr_trigger_t **trigs); - -int orte_gpr_replica_remove_local_subscription(orte_gpr_replica_local_subscriber_t *sub); - -int orte_gpr_replica_remove_local_trigger(orte_gpr_replica_local_trigger_t *trig); - -int orte_gpr_replica_record_action(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itagval_t *iptr, - orte_gpr_replica_action_t action); - -int orte_gpr_replica_check_events(void); - -int orte_gpr_replica_check_subscription(orte_gpr_replica_subscription_t *sub); - -bool orte_gpr_replica_check_notify_matches(orte_gpr_addr_mode_t *addr_mode, - orte_gpr_replica_subscription_t *sub, - orte_gpr_replica_action_taken_t *ptr); - -int orte_gpr_replica_check_trig(orte_gpr_replica_trigger_t *trig); - -int orte_gpr_replica_update_storage_locations(orte_gpr_replica_itagval_t *new_iptr); - -int orte_gpr_replica_construct_notify_message(orte_gpr_notify_message_t *msg, - orte_gpr_replica_trigger_t *trig, - orte_gpr_replica_subscription_t *sub, - orte_gpr_value_t *value); - -int -orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr, - orte_process_name_t *requestor, - orte_gpr_subscription_t *subscription); - -int -orte_gpr_replica_register_trigger(orte_gpr_replica_trigger_t **trigptr, - orte_process_name_t *requestor, - orte_gpr_trigger_t *trigger); - -int -orte_gpr_replica_remove_subscription(orte_process_name_t *requestor, - orte_gpr_subscription_id_t id); - -int -orte_gpr_replica_remove_trigger(orte_process_name_t *requestor, - orte_gpr_trigger_id_t id); - -int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub, - orte_gpr_value_t *value); - -int orte_gpr_replica_register_trigger_callback(orte_gpr_replica_trigger_t *trig); - -int orte_gpr_replica_define_callback(orte_gpr_notify_msg_type_t msg_type, - orte_gpr_replica_callbacks_t **cbptr, - orte_process_name_t *recipient); - -int orte_gpr_replica_process_callbacks(void); - -int orte_gpr_replica_purge_subscriptions(orte_process_name_t *proc); - -int orte_gpr_replica_store_value_in_msg(orte_gpr_replica_requestor_t *req, - orte_gpr_notify_message_t *msg, - char *sub_name, - orte_std_cntr_t cnt, - orte_gpr_value_t **values); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_local_trig_ops_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_local_trig_ops_fn.c deleted file mode 100644 index 474330723e..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_local_trig_ops_fn.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - support functions. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/mca/gpr/replica/api_layer/gpr_replica_api.h" -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - - -/* FUNCTIONS REQUIRED FOR LOCAL SUBSCRIPTION AND TRIGGER - * REGISTRATION - */ -int -orte_gpr_replica_enter_local_subscription(orte_std_cntr_t cnt, orte_gpr_subscription_t **subscriptions) -{ - orte_gpr_replica_local_subscriber_t *sub; - orte_std_cntr_t i; - - OPAL_TRACE(2); - - for (i=0; i < cnt; i++) { - sub = OBJ_NEW(orte_gpr_replica_local_subscriber_t); - if (NULL == sub) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (NULL != subscriptions[i]->name) { - sub->name = strdup(subscriptions[i]->name); - } - sub->callback = subscriptions[i]->cbfunc; - sub->user_tag = subscriptions[i]->user_tag; - if (0 > orte_pointer_array_add(&sub->index, orte_gpr_replica_globals.local_subscriptions, sub)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - sub->id = orte_gpr_replica_globals.num_local_subs; - subscriptions[i]->id = sub->id; - (orte_gpr_replica_globals.num_local_subs)++; - } - - return ORTE_SUCCESS; -} - - -int -orte_gpr_replica_enter_local_trigger(orte_std_cntr_t cnt, orte_gpr_trigger_t **trigs) -{ - orte_gpr_replica_local_trigger_t *trig, **tptr; - orte_std_cntr_t i, j, k; - - OPAL_TRACE(2); - - for (i=0; i < cnt; i++) { - /* If the provided trigger has a name, see if it already is on - * the local trigger list. If so, then check to see if we - * already defined a return point for it and/or if this trigger - * doesn't - in either of those two cases, we ignore the - * trigger and just use the existing entry - */ - if (NULL != trigs[i]->name) { - tptr = (orte_gpr_replica_local_trigger_t**)(orte_gpr_replica_globals.local_triggers)->addr; - for (j=0, k=0; k < orte_gpr_replica_globals.num_local_trigs && - j < (orte_gpr_replica_globals.local_triggers)->size; j++) { - if (NULL != tptr[j]) { - k++; - if (NULL != tptr[j]->name && 0 == strcmp(tptr[j]->name, trigs[i]->name)) { - /* same name - trigger is already on list */ - if (NULL != tptr[j]->callback || NULL == trigs[i]->cbfunc) { - /* ignore these cases */ - trig = tptr[j]; - goto MOVEON; - } - /* reach here if either the prior trigger didn't provide - * a callback, and the new one provides one. In this - * case, we update the existing trigger callback and then - * move on - */ - tptr[j]->callback = trigs[i]->cbfunc; - trig = tptr[j]; - goto MOVEON; - } - } - } - } - - /* either the trigger doesn't have a name, OR it did, but it isn't - * already on the list - add it to the list now - */ - trig = OBJ_NEW(orte_gpr_replica_local_trigger_t); - if (NULL == trig) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (NULL != trigs[i]->name) { - trig->name = strdup(trigs[i]->name); - } - /* ensure that the proper routing flag is set - * in the action field to match the trigger callback - * function - */ - if (NULL != trigs[i]->cbfunc) { - trigs[i]->action = trigs[i]->action | - ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME; - } else { - trigs[i]->action = trigs[i]->action & - ~ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME; - } - trig->callback = trigs[i]->cbfunc; - trig->user_tag = trigs[i]->user_tag; - if (0 > orte_pointer_array_add(&trig->index, orte_gpr_replica_globals.local_triggers, trig)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - trig->id = orte_gpr_replica_globals.num_local_trigs; - (orte_gpr_replica_globals.num_local_trigs)++; -MOVEON: - trigs[i]->id = trig->id; - - } - - return ORTE_SUCCESS; -} - -int orte_gpr_replica_remove_local_subscription(orte_gpr_replica_local_subscriber_t *sub) -{ - orte_std_cntr_t index; - - OPAL_TRACE(2); - - if (NULL == sub) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - index = sub->index; - OBJ_RELEASE(sub); - orte_pointer_array_set_item(orte_gpr_replica_globals.local_subscriptions, index, NULL); - - return ORTE_SUCCESS; -} - -int orte_gpr_replica_remove_local_trigger(orte_gpr_replica_local_trigger_t *trig) -{ - orte_std_cntr_t index; - - OPAL_TRACE(2); - - if (NULL == trig) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - index = trig->index; - OBJ_RELEASE(trig); - orte_pointer_array_set_item(orte_gpr_replica_globals.local_triggers, index, NULL); - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_messaging_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_messaging_fn.c deleted file mode 100644 index 774fb4ae34..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_messaging_fn.c +++ /dev/null @@ -1,668 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" - -#include "opal/util/output.h" - -#include "orte/dss/dss.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/ns/ns.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/schema/schema.h" - -#include "orte/mca/gpr/base/base.h" -#include "orte/mca/gpr/replica/api_layer/gpr_replica_api.h" -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" -#include "gpr_replica_fn.h" - -static int orte_gpr_replica_get_callback_data(orte_gpr_value_t ***values, orte_std_cntr_t *num_vals, - orte_gpr_replica_subscription_t *sub); - -static int orte_gpr_replica_store_value_in_trigger_msg(orte_gpr_replica_subscription_t *sub, - orte_gpr_notify_message_t *msg, - orte_std_cntr_t cnt, - orte_gpr_value_t **values); - -int orte_gpr_replica_process_callbacks(void) -{ - orte_gpr_replica_callbacks_t *cb; - orte_gpr_replica_trigger_t **trigs; - orte_gpr_replica_subscription_t **subs; - orte_gpr_replica_requestor_t **reqs; - orte_std_cntr_t i, j, k, m; - int rc; - - /* check and set flag indicating callbacks being processed */ - if (orte_gpr_replica.processing_callbacks) { - return ORTE_SUCCESS; - } - orte_gpr_replica.processing_callbacks = true; - - while (NULL != (cb = (orte_gpr_replica_callbacks_t*)opal_list_remove_last(&orte_gpr_replica.callbacks))) { - /* each callback corresponds to a specific requestor - * The message in the callback consists of at least one (and can - * be more) "datagrams" intended for that requestor, each of which - * is slated to be returned to a specific function on the requestor. - */ - if (NULL == cb->requestor) { /* local callback */ - /* Since this requestor is "local", we simply execute - * the callbacks ourself. - */ - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - if (ORTE_SUCCESS != (rc = orte_gpr_replica_deliver_notify_msg(cb->message))) { - ORTE_ERROR_LOG(rc); - } - OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); - } else { /* remote request - send messages back */ - orte_gpr_replica_remote_notify(cb->requestor, cb->message); - } - - OBJ_RELEASE(cb); - } - - /* cleanup any one-shot triggers that fired and set processing to - * false on all others - */ - trigs = (orte_gpr_replica_trigger_t**)((orte_gpr_replica.triggers)->addr); - for (i=0, k=0, m=0; k < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trigs[i]) { - k++; - if (trigs[i]->one_shot_fired) { - OBJ_RELEASE(trigs[i]); - orte_pointer_array_set_item(orte_gpr_replica.triggers, i, NULL); - m++; - } else { - trigs[i]->processing = false; - } - } - } - orte_gpr_replica.num_trigs -= m; - - /* cleanup any subscriptions that are supposed to be - * removed based on a trigger having fired - set processing to false - * on all others - */ - subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr; - for (i=0, k=0; k < orte_gpr_replica.num_subs && - i < (orte_gpr_replica.subscriptions)->size; i++) { - if (NULL != subs[i]) { - k++; - if (subs[i]->cleanup) { - reqs = (orte_gpr_replica_requestor_t**)(subs[i]->requestors)->addr; - for (j=0, m=0; NULL != subs[i] && - m < subs[i]->num_requestors && - j < (subs[i]->requestors)->size; j++) { - if (NULL != reqs[j]) { - m++; - if (ORTE_SUCCESS != (rc = - orte_gpr_replica_remove_subscription(reqs[j]->requestor, reqs[j]->idtag))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } else { - subs[i]->processing = false; - } - } - } - - /* all callbacks processed - indicate list is open */ - orte_gpr_replica.processing_callbacks = false; - - return ORTE_SUCCESS; -} - - - -int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub, - orte_gpr_value_t *value) -{ - orte_gpr_replica_callbacks_t *cb; - orte_gpr_replica_requestor_t **reqs; - orte_gpr_value_t **values; - orte_std_cntr_t cnt; - orte_std_cntr_t i, j; - bool cleanup_reqd; - int rc=ORTE_SUCCESS; - - /* The data to be returned will be the same for all requestors - * on this subscription. First, let's get the data (if it hasn't - * already been provided) so we have it ready to be added to - * the callback - */ - if (NULL != value) { /* no need to get data - already provided */ - values = &value; - cnt = 1; - cleanup_reqd = false; - } else { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_callback_data(&values, &cnt, sub))) { - ORTE_ERROR_LOG(rc); - return rc; - } - cleanup_reqd = true; - } - - /* We now have the data to be sent to each requestor attached - * to this subscription. - * Each subscription that was placed on the system has an associated - * structure containing the process name and array of callback info where - * data is to be returned. For remote processes, the callback - * info is omitted and a subscription id is recorded - this tells - * the remote process which callback function to use when it receives - * a message from us. - * Each subscription can have multiple "requestors" attached to it, - * each "requestor" consisting of the process name and - * subscription id (for remote processes), and callback info (for local - * processes). - * For each requestor, we need to check to see if a callback has - * already been scheduled to that destination - if so, we piggyback - * another datagram onto it to minimize communication costs. - */ - - /* this data is intended to be sent to the individual - * subscribers themselves. Cycle through the subscription's - * requestors, define callbacks to them appropriately, - * and set the id to indicate that it does NOT go - * to a trigger - */ - reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr; - for (i=0, j=0; j < sub->num_requestors && - i < (sub->requestors)->size; i++) { - if (NULL != reqs[i]) { - j++; - /* define the callback */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(ORTE_GPR_SUBSCRIPTION_MSG, - &cb, reqs[i]->requestor))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - /* set the callback id to indicate not a trigger callback */ - (cb->message)->id = ORTE_GPR_TRIGGER_ID_MAX; - /* okay, now we have a message going to the requestor. We need to - * store the values in the notify_data structure corresponding to this - * subscription id, combining data where the id's match - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i], - cb->message, sub->name, cnt, values))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - } /* for i */ - -CLEANUP: - /* release the values here - the value objects have been "retained" in - * the store_value function, so this just ensures that they will be - * released after the last datagram lets go of them - */ - for (i=0; i < cnt; i++) OBJ_RELEASE(values[i]); - /* release the values array IF and only IF it was malloc'd here. - * otherwise, the value is coming in from the outside - when that happens, - * only a single value is passed in, so there is no array to free - */ - if (cleanup_reqd && NULL != values) free(values); - - return rc; -} - - -int orte_gpr_replica_register_trigger_callback(orte_gpr_replica_trigger_t *trig) -{ - orte_gpr_replica_callbacks_t *cb; - orte_gpr_replica_counter_t **cntr; - orte_gpr_replica_subscription_t **subs; - orte_gpr_value_t **values, *value; - orte_std_cntr_t i, j, k, cnt; - int rc; - - /* set the callback's message - * to point at the correct trigger id for that requestor - * so the message goes to the correct place, and go ahead - * and store the data in the message - */ - /* define the callback */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(ORTE_GPR_TRIGGER_MSG, - &cb, (trig->master)->requestor))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* transfer the trigger name, if available */ - if (NULL != trig->name) { - (cb->message)->target = strdup(trig->name); - } - /* set the callback id to point to the trigger callback function */ - (cb->message)->id = (trig->master)->idtag; - - /* if the trigger counters are to be included, do so */ - if (ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS & trig->action) { - cntr = (orte_gpr_replica_counter_t**)((trig->counters)->addr); - for (i=0, j=0; j < trig->num_counters && - i < (trig->counters)->size; i++) { - if (NULL != cntr[i]) { - j++; - value = OBJ_NEW(orte_gpr_value_t); - if (NULL == value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - value->segment = strdup(cntr[i]->seg->name); - value->cnt = 1; - value->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*)); - if (NULL == value->keyvals) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - value->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t); - if (NULL == value->keyvals[0]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup( - &(value->keyvals[0]->key), cntr[i]->seg, - cntr[i]->iptr->itag))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - value->keyvals[0]->value = OBJ_NEW(orte_data_value_t); - if (NULL == value->keyvals[0]->value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - value->keyvals[0]->value->type = cntr[i]->iptr->value->type; - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((value->keyvals[0]->value)->data), cntr[i]->iptr->value->data, cntr[i]->iptr->value->type))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - /* - * store the data in the message - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_trigger_msg(NULL, - cb->message, 1, &value))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* release the storage */ - OBJ_RELEASE(value); - } - } - } - - /* cycle through all the trigger's subscriptions and place - * that data on the message - */ - subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr; - for (i=0, j=0; j < trig->num_subscriptions && - i < (trig->subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - if (NULL != subs[i]->name) { - /* if it's a named subscription, we will deliver it via the - * trigger callback function. The data to be returned will - * be the same for all requestors. - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_callback_data(&values, &cnt, subs[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* - * store the data in the message - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_trigger_msg(subs[i], - cb->message, cnt, values))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* release the storage */ - for (k=0; k < cnt; k++) OBJ_RELEASE(values[k]); - if (NULL != values) free(values); - } else { - /* in the case of a non-named subscription, we know that someone - * has attached a subscription to this trigger, and that the - * requestor needs the data to be returned directly to them. This - * occurs in the case of orterun, which attaches subscriptions to - * the standard triggers so it can monitor the progress of a job - * it has launched. To facilitate this, we register a separate - * callback for this subscription - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(subs[i], NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_define_callback(orte_gpr_notify_msg_type_t msg_type, - orte_gpr_replica_callbacks_t **cbptr, - orte_process_name_t *recipient) -{ - orte_gpr_replica_callbacks_t *cb; - int rc; - - /* see if a callback has already been registered for this recipient */ - for (cb = (orte_gpr_replica_callbacks_t*)opal_list_get_first(&(orte_gpr_replica.callbacks)); - cb != (orte_gpr_replica_callbacks_t*)opal_list_get_end(&(orte_gpr_replica.callbacks)); - cb = (orte_gpr_replica_callbacks_t*)opal_list_get_next(cb)) { - /* must check to see if both the recipient is the same AND that the - * message type being sent is identical (i.e., that messages going back - * to trigger callbacks do NOT get mixed with messages going back to - * subscription callbacks). This is critical as the deliver_notify_msg - * functions handle these message types in different ways - */ - if (((NULL == recipient && NULL == cb->requestor) && - (msg_type == cb->message->msg_type)) || - (((NULL != recipient && NULL != cb->requestor) && - (ORTE_EQUAL == orte_dss.compare(recipient, cb->requestor, ORTE_NAME))) && - (msg_type == cb->message->msg_type))) { - /* okay, a callback has been registered to send data to this - * recipient - return this location - */ - *cbptr = cb; - return ORTE_SUCCESS; - } - } - - /* this is going to somebody new - create a new callback - * for this recipient - */ - cb = OBJ_NEW(orte_gpr_replica_callbacks_t); - if (NULL == cb) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - opal_list_append(&orte_gpr_replica.callbacks, &cb->item); - - /* construct the message */ - cb->message = OBJ_NEW(orte_gpr_notify_message_t); - if (NULL == cb->message) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - cb->message->msg_type = msg_type; - - if (NULL == recipient) { - cb->requestor = NULL; - } else { - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(cb->requestor), recipient, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* return the pointer to the new callback */ - *cbptr = cb; - - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_store_value_in_msg(orte_gpr_replica_requestor_t *req, - orte_gpr_notify_message_t *msg, - char *sub_name, - orte_std_cntr_t cnt, - orte_gpr_value_t **values) -{ - orte_std_cntr_t i, j, k, index; - orte_gpr_notify_data_t **data, *dptr; - - /* check to see if this data is going to the same place as - * any prior data on the message. if so, then we add the values - * to that existing data structure. if not, then we realloc to - * establish a new data structure and store the data there - */ - data = (orte_gpr_notify_data_t**)(msg->data)->addr; - for (i=0, k=0; k < msg->cnt && - i < (msg->data)->size; i++) { - if (NULL != data[i]) { - k++; - if (data[i]->id == req->idtag) { /* going to the same place */ - for (j=0; j < cnt; j++) { - if (0 > orte_pointer_array_add(&index, data[i]->values, values[j])) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* must "retain" the value object to ensure that it is - * there for this datagram. Since we are only storing - * pointers to the object (and not actually copying it), - * datagrams may wind up sharing the object. Hence, when - * a datagram is released, it will release the object. Without - * the retain, the next datagram that shares that object - * will see trash - */ - OBJ_RETAIN(values[j]); - } - data[i]->cnt += cnt; - return ORTE_SUCCESS; - } - } - } - - /* no prior matching data found, so add another data location to - * the message and store the values there - */ - dptr = OBJ_NEW(orte_gpr_notify_data_t); - if (NULL == dptr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* set the name of the subscription, if provided */ - if (NULL != sub_name) { - dptr->target = strdup(sub_name); - } - dptr->id = req->idtag; - if (0 > orte_pointer_array_add(&index, msg->data, dptr)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (msg->cnt)++; - for (j=0; j < cnt; j++) { - if (0 > orte_pointer_array_add(&index, dptr->values, values[j])) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* must "retain" the value object to ensure that it is - * there for this datagram. Since we are only storing - * pointers to the object (and not actually copying it), - * datagrams may wind up sharing the object. Hence, when - * a datagram is released, it will release the object. Without - * the retain, the next datagram that shares that object - * will see trash - */ - OBJ_RETAIN(values[j]); - } - dptr->cnt = cnt; - return ORTE_SUCCESS; -} - -static int orte_gpr_replica_store_value_in_trigger_msg(orte_gpr_replica_subscription_t *sub, - orte_gpr_notify_message_t *msg, - orte_std_cntr_t cnt, - orte_gpr_value_t **values) -{ - orte_std_cntr_t i, j, k, index; - orte_gpr_notify_data_t **data, *dptr; - - /* check to see if this data is going to the same place as - * any prior data on the message. if so, then we add the values - * to that existing data structure. if not, then we realloc to - * establish a new data structure and store the data there - */ - data = (orte_gpr_notify_data_t**)(msg->data)->addr; - for (i=0, k=0; k < msg->cnt && - i < (msg->data)->size; i++) { - if (NULL != data[i]) { - k++; - if ((NULL == data[i]->target && NULL == sub) || - (NULL != data[i]->target && NULL != sub->name && - 0 == strcmp(data[i]->target, sub->name))) { /* going to the same place */ - for (j=0; j < cnt; j++) { - if (0 > orte_pointer_array_add(&index, data[i]->values, values[j])) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* must "retain" the value object to ensure that it is - * there for this datagram. Since we are only storing - * pointers to the object (and not actually copying it), - * datagrams may wind up sharing the object. Hence, when - * a datagram is released, it will release the object. Without - * the retain, the next datagram that shares that object - * will see trash - */ - OBJ_RETAIN(values[j]); - } - data[i]->cnt += cnt; - return ORTE_SUCCESS; - } - } - } - - /* no prior matching data found, so add another data location to - * the message and store the values there - */ - dptr = OBJ_NEW(orte_gpr_notify_data_t); - if (NULL == dptr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (NULL != sub && NULL != sub->name) { - dptr->target = strdup(sub->name); - } - if (0 > orte_pointer_array_add(&index, msg->data, dptr)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (msg->cnt)++; - for (j=0; j < cnt; j++) { - if (0 > orte_pointer_array_add(&index, dptr->values, values[j])) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* must "retain" the value object to ensure that it is - * there for this datagram. Since we are only storing - * pointers to the object (and not actually copying it), - * datagrams may wind up sharing the object. Hence, when - * a datagram is released, it will release the object. Without - * the retain, the next datagram that shares that object - * will see trash - */ - OBJ_RETAIN(values[j]); - } - dptr->cnt = cnt; - return ORTE_SUCCESS; -} - -static int orte_gpr_replica_get_callback_data(orte_gpr_value_t ***ret_values, orte_std_cntr_t *cnt, - orte_gpr_replica_subscription_t *sub) -{ - orte_gpr_value_t **vals, **values; - orte_gpr_replica_ivalue_t **ivals; - orte_std_cntr_t i, j, k, num_tokens, num_keys, interim, count; - int rc; - - /* setup default error returns */ - *ret_values = NULL; - *cnt = 0; - - /* get the data off the registry. since a - * subscription can have multiple data sources specified, we - * have to loop through those sources, constructing an aggregated - * array of data values that we can work with in composing the - * final message - */ - ivals = (orte_gpr_replica_ivalue_t**)(sub->values)->addr; - count = 0; - values = NULL; - for (i=0, j=0; j < sub->num_values && - i < (sub->values)->size; i++) { - if (NULL != ivals[i]) { - j++; - num_tokens = orte_value_array_get_size(&(ivals[i]->tokentags)); - num_keys = orte_value_array_get_size(&(ivals[i]->keytags)); - /* get the data for this description off the registry */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_fn(ivals[i]->addr_mode, - ivals[i]->seg, - ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->tokentags), orte_gpr_replica_itag_t), - num_tokens, - ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->keytags), orte_gpr_replica_itag_t), - num_keys, - &interim, &vals))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* if we don't get any data back, just continue - don't - * try to add it to the values since that would cause a - * zero-byte malloc - */ - if (0 == interim) { - continue; - } - /* add these results to those we have already obtained */ - if (0 == count) { /* first time through */ - values = (orte_gpr_value_t**)malloc(interim * - sizeof(orte_gpr_value_t*)); - if (NULL == values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } else { - /* reallocate values array */ - values = (orte_gpr_value_t**)realloc(values, - (count+interim)*sizeof(orte_gpr_value_t*)); - if (NULL == values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - /* add data to end of array */ - for (k=0; k < interim; k++) { - values[k+count] = vals[k]; - } - /* release the array of pointers - the pointers themselves - * will remain "alive" in the values array to be released - * later - */ - free(vals); - /* update the count */ - count += interim; - } - } - *ret_values = values; - *cnt = count; - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_put_get_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_put_get_fn.c deleted file mode 100644 index abe617b80f..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_put_get_fn.c +++ /dev/null @@ -1,742 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/util/proc_info.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" -#include "orte/mca/gpr/replica/api_layer/gpr_replica_api.h" - -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - -/* - * Local typedef for storing a list of itagvals - * - used exclusively by "get" routines - */ -typedef struct { - opal_list_item_t item; /* required for this to be on list */ - orte_gpr_replica_itag_t itag; /* itag for this value's key */ - orte_data_value_t value; -} orte_gpr_replica_ival_list_t; - -/* constructor */ -static void orte_gpr_replica_ival_list_constructor(orte_gpr_replica_ival_list_t* ptr) -{ - ptr->itag = 0; - OBJ_CONSTRUCT(&(ptr->value), orte_data_value_t); -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_ival_list_destructor(orte_gpr_replica_ival_list_t* ptr) -{ - OBJ_DESTRUCT(&(ptr->value)); -} - -/* define instance of ival_list_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_ival_list_t, /* type name */ - opal_list_item_t, /* parent "class" name */ - orte_gpr_replica_ival_list_constructor, /* constructor */ - orte_gpr_replica_ival_list_destructor); /* destructor */ - -/* - * Local typedef for storing a list of containers - * - used exclusively by "get" routines - */ -typedef struct { - opal_list_item_t item; /* required for this to be on list */ - orte_gpr_replica_container_t *cptr; /* pointer to the container */ - opal_list_t *ival_list; /* list of ival_list_t of values found by get */ -} orte_gpr_replica_get_list_t; - -/* constructor */ -static void orte_gpr_replica_get_list_constructor(orte_gpr_replica_get_list_t* ptr) -{ - ptr->cptr = NULL; - ptr->ival_list = OBJ_NEW(opal_list_t); -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_get_list_destructor(orte_gpr_replica_get_list_t* ptr) -{ - orte_gpr_replica_ival_list_t *iptr; - - while (NULL != (iptr = (orte_gpr_replica_ival_list_t*)opal_list_remove_first(ptr->ival_list))) { - OBJ_RELEASE(iptr); - } - OBJ_RELEASE(ptr->ival_list); - -} - -/* define instance of get_list_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_get_list_t, /* type name */ - opal_list_item_t, /* parent "class" name */ - orte_gpr_replica_get_list_constructor, /* constructor */ - orte_gpr_replica_get_list_destructor); /* destructor */ - - - -/* - * FUNCTIONS - */ - -int orte_gpr_replica_put_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *token_itags, orte_std_cntr_t num_tokens, - orte_std_cntr_t cnt, orte_gpr_keyval_t **keyvals) -{ - orte_gpr_replica_container_t **cptr, *cptr2; - orte_gpr_replica_itag_t itag; - orte_gpr_replica_addr_mode_t tok_mode; - orte_gpr_replica_itagval_t *iptr, **iptrs; - bool overwrite, duplicate, overwritten; - int rc; - orte_std_cntr_t i, j, k, m, n, index; - - OPAL_TRACE(2); - - if (orte_gpr_replica_globals.debug) { - char *tmp; - - opal_output(0, "%s gpr_replica_put: entered on segment %s\nValues:", - ORTE_NAME_PRINT(orte_process_info.my_name), seg->name); - for (i=0; i < cnt; i++) { - opal_output(0, "\tKey: %s", keyvals[i]->key); - } - opal_output(0, "Tokens:"); - for (i=0; i < num_tokens; i++) { - orte_gpr_replica_dict_reverse_lookup(&tmp, seg, token_itags[i]); - opal_output(0, "\t%s", tmp); - free(tmp); - } - } - - /* initialize storage for actions taken */ - orte_pointer_array_clear(orte_gpr_replica_globals.acted_upon); - orte_gpr_replica_globals.num_acted_upon = 0; - orte_pointer_array_clear(orte_gpr_replica_globals.overwritten); - orte_gpr_replica_globals.num_overwritten = 0; - - /* extract the token address mode and overwrite permissions */ - overwrite = false; - duplicate = true; - if (addr_mode & ORTE_GPR_OVERWRITE) { - overwrite = true; - } else if (addr_mode & ORTE_GPR_NO_DUPLICATE) { - duplicate = false; - } - - tok_mode = ORTE_GPR_REPLICA_TOKMODE(addr_mode); - if (0x00 == tok_mode) { /* default tokens addressing mode to AND */ - tok_mode = ORTE_GPR_REPLICA_AND; - } - - /* find the specified container(s) */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_containers(seg, tok_mode, - token_itags, num_tokens))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (NULL == token_itags && 0 == orte_gpr_replica_globals.num_srch_cptr) { /* wildcard tokens but nothing found */ - /* no ERROR_LOG entry created as this is not a system failure */ - return ORTE_ERR_NOT_FOUND; - } - - if (0 == orte_gpr_replica_globals.num_srch_cptr) { /* existing container not found - create one */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_container(&cptr2, seg, - num_tokens, token_itags))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* ok, store all the keyvals in the container */ - for (i=0; i < cnt; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_add_keyval(&iptr, seg, cptr2, keyvals[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* record that we did this */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_record_action(seg, cptr2, iptr, ORTE_GPR_REPLICA_ENTRY_ADDED))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - } else { /* otherwise, go through list of containers. For each one, - see if entry already exists in container - overwrite if allowed */ - cptr = (orte_gpr_replica_container_t**)(orte_gpr_replica_globals.srch_cptr)->addr; - iptrs = (orte_gpr_replica_itagval_t**)(orte_gpr_replica_globals.overwritten)->addr; - for (j=0, k=0; k < orte_gpr_replica_globals.num_srch_cptr && - j < (orte_gpr_replica_globals.srch_cptr)->size; j++) { - if (NULL != cptr[j]) { - k++; - for (i=0; i < cnt; i++) { /* for each provided keyval */ - if (ORTE_SUCCESS == orte_gpr_replica_create_itag(&itag, seg, keyvals[i]->key) && - ORTE_SUCCESS == orte_gpr_replica_search_container( - ORTE_GPR_REPLICA_OR, - &itag, 1, cptr[j])) { - if (0 < orte_gpr_replica_globals.num_srch_ival) { - /* this key already exists - overwrite, if permission given */ - if (overwrite) { - /* check to see if we have already overwritten this keyval. if so, - * then we add the remaining values - otherwise, only the - * last value provided would be retained! - */ - overwritten = false; - for (m=0, n=0; !overwritten && - n < orte_gpr_replica_globals.num_overwritten && - m < (orte_gpr_replica_globals.overwritten)->size; m++) { - if (NULL != iptrs[m]) { - n++; - if (iptrs[m]->itag == itag) { - /* keyval was previously overwritten so just add this one as another entry */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_add_keyval(&iptr, seg, cptr[j], keyvals[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* record that we did this */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_record_action(seg, cptr[j], iptr, ORTE_GPR_REPLICA_ENTRY_CHANGED))) { - ORTE_ERROR_LOG(rc); - return rc; - } - overwritten = true; - } - } - } - if (!overwritten) { - /* must not have been previously overwritten - go - * ahead and overwrite it now - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_update_keyval(&iptr, seg, cptr[j], keyvals[i]))) { - return rc; - } - /* record the ival so we don't do it again */ - if (0 > orte_pointer_array_add(&index, orte_gpr_replica_globals.overwritten, (void*)iptr)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (orte_gpr_replica_globals.num_overwritten)++; - } - } else if (duplicate) { - /* no overwrite permission - add this keyval to the container as a new entry - * if we are allowed to duplicate. Otherwise, we just ignore it. - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_add_keyval(&iptr, seg, cptr[j], keyvals[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* record that we did this */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_record_action(seg, cptr[j], iptr, ORTE_GPR_REPLICA_ENTRY_CHANGED))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } else { /* new key - add to container */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_add_keyval(&iptr, seg, cptr[j], keyvals[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* record that we did this */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_record_action(seg, cptr[j], iptr, ORTE_GPR_REPLICA_ENTRY_ADDED))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - } - } - } - } - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr_replica_put: complete", ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_put_nb_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *token_itags, orte_std_cntr_t num_tokens, - orte_std_cntr_t cnt, orte_gpr_keyval_t **keyvals, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(2); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - - -int orte_gpr_replica_get_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *tokentags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *keytags, orte_std_cntr_t num_keys, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values) -{ - opal_list_t get_list; - orte_gpr_replica_get_list_t *gptr; - orte_gpr_replica_ival_list_t *ival_list; - orte_gpr_replica_container_t **cptr, *cptr2; - orte_gpr_replica_itagval_t **iptr; - orte_gpr_keyval_t **kptr; - orte_gpr_replica_addr_mode_t tokmode, keymode; - int rc; - orte_std_cntr_t i, j, k, m; - bool stripped; - - OPAL_TRACE(2); - - if (orte_gpr_replica_globals.debug) { - char *token; - opal_output(0, "%s gpr_replica_get: entered", - ORTE_NAME_PRINT(orte_process_info.my_name)); - opal_output(0, "\tGetting data from segment %s with %d tokens and %d keys", - seg->name, num_tokens, num_keys); - for (i=0; i < num_tokens; i++) { - if (ORTE_SUCCESS != orte_gpr_replica_dict_reverse_lookup( - &token, seg, tokentags[i])) { - opal_output(0, "\t\ttoken num %d: No entry found for itag %X", - i, tokentags[i]); - } else { - opal_output(0, "\t\ttoken num %d: itag %d\tToken: %s", - i, tokentags[i], token); - free(token); - } - } - for (i=0; i < num_keys; i++) { - if (ORTE_SUCCESS != orte_gpr_replica_dict_reverse_lookup( - &token, seg, keytags[i])) { - opal_output(0, "\t\tkey num %d: No entry found for itag %X", - i, keytags[i]); - } else { - opal_output(0, "\t\tkey num %d: itag %d\tKey: %s", - i, keytags[i], token); - free(token); - } - } - - } - - /* initialize the list of findings */ - OBJ_CONSTRUCT(&get_list, opal_list_t); - *cnt = 0; - *values = NULL; - - tokmode = ORTE_GPR_REPLICA_TOKMODE(addr_mode); - if (0x00 == tokmode) { /* default token addressing mode to AND */ - tokmode = ORTE_GPR_REPLICA_AND; - } - keymode = ORTE_GPR_REPLICA_KEYMODE(addr_mode); - if (0x00 == keymode) { /* default key addressing mode to OR */ - keymode = ORTE_GPR_REPLICA_OR; - } - - /* set the stripped flag - do they want descriptive info in result or not */ - if (ORTE_GPR_REPLICA_STRIPPED(addr_mode)) { - stripped = true; - } else { - stripped = false; - } - - /* find all containers that meet search criteria for tokens */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_containers(seg, tokmode, - tokentags, num_tokens))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&get_list); - return rc; - } - - /* if nothing found, then can return */ - if (0 == orte_gpr_replica_globals.num_srch_cptr) { - OBJ_DESTRUCT(&get_list); - return ORTE_SUCCESS; - } - - /* for each container that was found, search it to find all matching keytags - * subject to specified mode. Collect the results on get_list - */ - cptr = (orte_gpr_replica_container_t**)((orte_gpr_replica_globals.srch_cptr)->addr); - for (i=0, k=0; k < orte_gpr_replica_globals.num_srch_cptr && - i < (orte_gpr_replica_globals.srch_cptr)->size; i++) { - if (NULL != cptr[i]) { - k++; - if (ORTE_SUCCESS == orte_gpr_replica_search_container(keymode, - keytags, num_keys, cptr[i]) && - 0 < orte_gpr_replica_globals.num_srch_ival) { - gptr = OBJ_NEW(orte_gpr_replica_get_list_t); - gptr->cptr = cptr[i]; - iptr = (orte_gpr_replica_itagval_t**)((orte_gpr_replica_globals.srch_ival)->addr); - for (j=0, m=0; m < orte_gpr_replica_globals.num_srch_ival && - j < (orte_gpr_replica_globals.srch_ival)->size; j++) { - if (NULL != iptr[j]) { - m++; - ival_list = OBJ_NEW(orte_gpr_replica_ival_list_t); - if (NULL == ival_list) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - ival_list->itag = iptr[j]->itag; - ival_list->value.type = iptr[j]->value->type; - /* it is okay for the data to be NULL as we may not have stored a value yet - * or we may be dealing with an UNDEF type - */ - if (NULL != iptr[j]->value->data) { - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((ival_list->value).data), iptr[j]->value->data, iptr[j]->value->type))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ival_list); - return rc; - } - } - opal_list_append(gptr->ival_list, &ival_list->item); - } - } - opal_list_append(&get_list, &gptr->item); - (*cnt)++; /* update number of containers that had something found */ - } - } - } - - if (0 == *cnt) { /* nothing found - report that */ - rc = ORTE_SUCCESS; - goto CLEANUP; - } - - /* if something found, convert it to array of values */ - *values = (orte_gpr_value_t**)malloc((*cnt) * sizeof(orte_gpr_value_t*)); - if (NULL == *values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - for (i=0; i < *cnt; i++) { - gptr = (orte_gpr_replica_get_list_t*)opal_list_remove_first(&get_list); - if (NULL == gptr) { - ORTE_ERROR_LOG(ORTE_ERROR); - rc = ORTE_ERROR; - goto CLEANUP; - } - if (stripped) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&((*values)[i]), addr_mode, NULL, - (orte_std_cntr_t)opal_list_get_size(gptr->ival_list), - 0))) { - ORTE_ERROR_LOG(rc); - *cnt = 0; - goto CLEANUP; - } - } else { - cptr2 = gptr->cptr; - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&((*values)[i]), addr_mode, seg->name, - (orte_std_cntr_t)opal_list_get_size(gptr->ival_list), - cptr2->num_itags))) { - ORTE_ERROR_LOG(rc); - *cnt = 0; - goto CLEANUP; - } - for (j=0; j < cptr2->num_itags; j++) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup(&((*values)[i]->tokens[j]), seg, cptr2->itags[j]))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - } - kptr = (*values)[i]->keyvals; - for (j=0; j < (*values)[i]->cnt; j++) { - ival_list = (orte_gpr_replica_ival_list_t*)opal_list_remove_first(gptr->ival_list); - if (NULL == ival_list) { - ORTE_ERROR_LOG(ORTE_ERROR); - rc = ORTE_ERROR; - goto CLEANUP; - } - kptr[j] = OBJ_NEW(orte_gpr_keyval_t); - if (NULL == kptr[j]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup( - &(kptr[j]->key), seg, ival_list->itag))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - kptr[j]->value = OBJ_NEW(orte_data_value_t); - if (NULL == kptr[j]->value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - kptr[j]->value->type = ival_list->value.type; - /* okay to have NULL data */ - if (NULL != ival_list->value.data) { - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((kptr[j]->value)->data), ival_list->value.data, ival_list->value.type))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - OBJ_RELEASE(ival_list); - } - OBJ_RELEASE(gptr); - } - -CLEANUP: - - while (NULL != (gptr = (orte_gpr_replica_get_list_t*)opal_list_remove_first(&get_list))) { - OBJ_RELEASE(gptr); - } - OBJ_DESTRUCT(&get_list); - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr_replica_get: finished search", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - return rc; -} - - -int orte_gpr_replica_get_conditional_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *tokentags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *keytags, orte_std_cntr_t num_keys, - orte_std_cntr_t num_conditions, orte_gpr_replica_itagval_t **conditions, - orte_std_cntr_t *cnt, orte_gpr_value_t ***values) -{ - opal_list_t get_list; - orte_gpr_replica_get_list_t *gptr; - orte_gpr_replica_ival_list_t *ival_list; - orte_gpr_replica_container_t **cptr, *cptr2; - orte_gpr_replica_itagval_t **iptr; - orte_gpr_keyval_t **kptr; - orte_gpr_replica_addr_mode_t tokmode, keymode; - int rc; - orte_std_cntr_t i, j, k, m, n; - bool stripped; - - OPAL_TRACE(2); - - /* initialize the list of findings */ - OBJ_CONSTRUCT(&get_list, opal_list_t); - *cnt = 0; - *values = NULL; - - tokmode = ORTE_GPR_REPLICA_TOKMODE(addr_mode); - if (0x00 == tokmode) { /* default token addressing mode to AND */ - tokmode = ORTE_GPR_REPLICA_AND; - } - keymode = ORTE_GPR_REPLICA_KEYMODE(addr_mode); - if (0x00 == keymode) { /* default key addressing mode to OR */ - keymode = ORTE_GPR_REPLICA_OR; - } - if (ORTE_GPR_REPLICA_STRIPPED(addr_mode)) { - stripped = true; - } else { - stripped = false; - } - - /* find all containers that meet search criteria for tokens */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_containers(seg, tokmode, - tokentags, num_tokens))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&get_list); - return rc; - } - - /* if nothing found, then can return */ - if (0 == orte_gpr_replica_globals.num_srch_cptr) { - OBJ_DESTRUCT(&get_list); - return ORTE_SUCCESS; - } - - /* for each container that was found, search it to find all matching keytags - * subject to specified mode and conditions. Collect the results on get_list - */ - cptr = (orte_gpr_replica_container_t**)((orte_gpr_replica_globals.srch_cptr)->addr); - for (i=0, k=0; k < orte_gpr_replica_globals.num_srch_cptr && - i < (orte_gpr_replica_globals.srch_cptr)->size; i++) { - if (NULL != cptr[i]) { - /* see if the conditions are met within this container */ - for (n=0; n < num_conditions; n++) { - if (!orte_gpr_replica_value_in_container(cptr[i], conditions[n])) { /* condition not met */ - goto MOVEON; - } - } - /* all conditions must have been met - check for requested keys & return them */ - if (ORTE_SUCCESS == orte_gpr_replica_search_container(keymode, - keytags, num_keys, cptr[i]) && - 0 < orte_gpr_replica_globals.num_srch_ival) { - gptr = OBJ_NEW(orte_gpr_replica_get_list_t); - gptr->cptr = cptr[i]; - iptr = (orte_gpr_replica_itagval_t**)((orte_gpr_replica_globals.srch_ival)->addr); - for (j=0, m=0; m < orte_gpr_replica_globals.num_srch_ival && - j < (orte_gpr_replica_globals.srch_ival)->size; j++) { - if (NULL != iptr[j]) { - m++; - ival_list = OBJ_NEW(orte_gpr_replica_ival_list_t); - if (NULL == ival_list) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - ival_list->itag = iptr[j]->itag; - ival_list->value.type = iptr[j]->value->type; - /* it is okay to have NULL data */ - if (NULL != iptr[j]->value->data) { - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((ival_list->value).data), iptr[j]->value->data, iptr[j]->value->type))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ival_list); - return rc; - } - } - opal_list_append(gptr->ival_list, &ival_list->item); - } - } - opal_list_append(&get_list, &gptr->item); - (*cnt)++; /* update number of containers that had something found */ - } -MOVEON: - k++; - } - } - - if (0 == *cnt) { /* nothing found - report that */ - rc = ORTE_SUCCESS; - goto CLEANUP; - } - - /* if something found, convert it to array of values */ - *values = (orte_gpr_value_t**)malloc((*cnt) * sizeof(orte_gpr_value_t*)); - if (NULL == *values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - for (i=0; i < *cnt; i++) { - gptr = (orte_gpr_replica_get_list_t*)opal_list_remove_first(&get_list); - if (NULL == gptr) { - ORTE_ERROR_LOG(ORTE_ERROR); - rc = ORTE_ERROR; - goto CLEANUP; - } - if (stripped) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&((*values)[i]), addr_mode, NULL, - (orte_std_cntr_t)opal_list_get_size(gptr->ival_list), - 0))) { - ORTE_ERROR_LOG(rc); - *cnt = 0; - goto CLEANUP; - } - } else { - cptr2 = gptr->cptr; - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&((*values)[i]), addr_mode, seg->name, - (orte_std_cntr_t)opal_list_get_size(gptr->ival_list), - cptr2->num_itags))) { - ORTE_ERROR_LOG(rc); - *cnt = 0; - goto CLEANUP; - } - for (j=0; j < cptr2->num_itags; j++) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup(&((*values)[i]->tokens[j]), seg, cptr2->itags[j]))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - } - kptr = (*values)[i]->keyvals; - for (j=0; j < (*values)[i]->cnt; j++) { - ival_list = (orte_gpr_replica_ival_list_t*)opal_list_remove_first(gptr->ival_list); - if (NULL == ival_list) { - ORTE_ERROR_LOG(ORTE_ERROR); - rc = ORTE_ERROR; - goto CLEANUP; - } - kptr[j] = OBJ_NEW(orte_gpr_keyval_t); - if (NULL == kptr[j]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup( - &(kptr[j]->key), seg, ival_list->itag))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - kptr[j]->value = OBJ_NEW(orte_data_value_t); - if (NULL == kptr[j]->value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - kptr[j]->value->type = ival_list->value.type; - /* okay to have NULL data */ - if (NULL != ival_list->value.data) { - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((kptr[j]->value)->data), ival_list->value.data, ival_list->value.type))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - OBJ_RELEASE(ival_list); - } - OBJ_RELEASE(gptr); - } - -CLEANUP: - - while (NULL != (gptr = (orte_gpr_replica_get_list_t*)opal_list_remove_first(&get_list))) { - OBJ_RELEASE(gptr); - } - OBJ_DESTRUCT(&get_list); - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr_replica_get: finished search", - ORTE_NAME_PRINT(orte_process_info.my_name)); - } - - return rc; -} - - -int orte_gpr_replica_get_nb_fn(orte_gpr_addr_mode_t addr_mode, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t *tokentags, orte_std_cntr_t num_tokens, - orte_gpr_replica_itag_t *keytags, orte_std_cntr_t num_keys, - orte_gpr_notify_cb_fn_t cbfunc, void *user_tag) -{ - OPAL_TRACE(2); - - return ORTE_ERR_NOT_IMPLEMENTED; -} - diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_segment_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_segment_fn.c deleted file mode 100644 index 1a3ab64a56..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_segment_fn.c +++ /dev/null @@ -1,456 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - support functions. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/class/opal_object.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" - -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - - -int orte_gpr_replica_find_containers(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_addr_mode_t addr_mode, - orte_gpr_replica_itag_t *taglist, orte_std_cntr_t num_tags) -{ - orte_gpr_replica_container_t **cptr; - orte_std_cntr_t i, j, index; - - OPAL_TRACE(3); - - /* ensure the search array is clear */ - orte_pointer_array_clear(orte_gpr_replica_globals.srch_cptr); - orte_gpr_replica_globals.num_srch_cptr = 0; - - cptr = (orte_gpr_replica_container_t**)((seg->containers)->addr); - for (i=0, j=0; j < seg->num_containers && - i < (seg->containers)->size; i++) { - if (NULL != cptr[i]) { - j++; - if (orte_gpr_replica_check_itag_list(addr_mode, - num_tags, taglist, - cptr[i]->num_itags, cptr[i]->itags)) { - if (0 > orte_pointer_array_add(&index, orte_gpr_replica_globals.srch_cptr, cptr[i])) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - orte_pointer_array_clear(orte_gpr_replica_globals.srch_cptr); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (orte_gpr_replica_globals.num_srch_cptr)++; - } - } - } - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_create_container(orte_gpr_replica_container_t **cptr, - orte_gpr_replica_segment_t *seg, - orte_std_cntr_t num_itags, - orte_gpr_replica_itag_t *itags) -{ - int rc; - orte_std_cntr_t index; - - OPAL_TRACE(3); - - *cptr = OBJ_NEW(orte_gpr_replica_container_t); - if (NULL == *cptr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != - (rc = orte_gpr_replica_copy_itag_list(&((*cptr)->itags), itags, num_itags))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(*cptr); - return rc; - } - - (*cptr)->num_itags = num_itags; - - if (0 > orte_pointer_array_add(&index, seg->containers, (void*)(*cptr))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (seg->num_containers)++; - - (*cptr)->index = index; - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_release_container(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr) -{ - orte_gpr_replica_itagval_t **iptr; - orte_std_cntr_t i; - int rc; - - OPAL_TRACE(3); - - /* delete all the itagvals in the container */ - iptr = (orte_gpr_replica_itagval_t**)((cptr->itagvals)->addr); - for (i=0; i < (cptr->itagvals)->size; i++) { - if (NULL != iptr[i]) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_delete_itagval(seg, cptr, iptr[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - - /* remove container from segment and release it */ - i = cptr->index; - OBJ_RELEASE(cptr); - orte_pointer_array_set_item(seg->containers, i, NULL); - (seg->num_containers)--; - - /* if the segment is now empty of containers, release it too */ - if (0 == seg->num_containers) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_release_segment(&seg))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_add_keyval(orte_gpr_replica_itagval_t **ivalptr, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_keyval_t *kptr) -{ - orte_gpr_replica_itag_t itag; - int rc; - - OPAL_TRACE(3); - - /* protect against dumb errors - caller must at least provide us with a key */ - if (NULL == kptr || NULL == kptr->key) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_itag(&itag, seg, kptr->key))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr_replica_add_itagval(ivalptr, seg, cptr, itag, kptr->value))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_gpr_replica_add_itagval(orte_gpr_replica_itagval_t **ivalptr, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itag_t itag, - orte_data_value_t *dval) -{ - orte_gpr_replica_itagval_t *iptr; - int rc; - - iptr = OBJ_NEW(orte_gpr_replica_itagval_t); - if (NULL == iptr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - iptr->itag = itag; - - iptr->value = OBJ_NEW(orte_data_value_t); - if (NULL == iptr->value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(iptr); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* it is perfectly acceptable to give us an itag that doesn't have a value. For - * example, we may want to predefine a location when we setup a trigger, then actually - * put a value in it later. - */ - if (NULL != dval) { - iptr->value->type = dval->type; - if (NULL != dval->data) { - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((iptr->value)->data), dval->data, dval->type))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(iptr); - return rc; - } - } - } - - if (0 > orte_pointer_array_add(&(iptr->index), cptr->itagvals, (void*)iptr)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(iptr); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (cptr->num_itagvals)++; - - if (0 > (rc = orte_value_array_append_item(&(cptr->itaglist), (void*)(&(iptr->itag))))) { - ORTE_ERROR_LOG(rc); - orte_pointer_array_set_item(cptr->itagvals, iptr->index, NULL); - OBJ_RELEASE(iptr); - return rc; - } - - *ivalptr = iptr; - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_delete_itagval(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itagval_t *iptr) -{ - orte_std_cntr_t i; - int rc; - - OPAL_TRACE(3); - - /* record that we are going to do this - * NOTE: it is important that we make the record BEFORE doing the release. - * The record_action function will do a RETAIN on the object so it - * doesn't actually get released until we check subscriptions to see - * if someone wanted to be notified if/when this object was released - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_record_action(seg, cptr, iptr, - ORTE_GPR_REPLICA_ENTRY_DELETED))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* remove the itag value from the container's list */ - for (i=0; i < orte_value_array_get_size(&(cptr->itaglist)); i++) { - if (iptr->itag == ORTE_VALUE_ARRAY_GET_ITEM(&(cptr->itaglist), orte_gpr_replica_itag_t, i)) { - orte_value_array_remove_item(&(cptr->itaglist), i); - goto MOVEON; - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - -MOVEON: - /* release the data storage */ - i = iptr->index; - OBJ_RELEASE(iptr); - - /* remove the entry from the container's itagval array */ - orte_pointer_array_set_item(cptr->itagvals, i, NULL); - (cptr->num_itagvals)--; - - /* NOTE: If the container is now empty, *don't* remove it here - * This is cause improper recursion if called from orte_gpr_replica_release_container - */ - - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_update_keyval(orte_gpr_replica_itagval_t **iptr2, - orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_keyval_t *kptr) -{ - orte_std_cntr_t i, j, k; - int rc; - orte_pointer_array_t *ptr; - orte_gpr_replica_itagval_t *iptr; - - OPAL_TRACE(3); - - ptr = orte_gpr_replica_globals.srch_ival; - - /* record the error value */ - *iptr2 = NULL; - - /* for each item in the search array, delete it */ - for (i=0; i < ptr->size; i++) { - if (NULL != ptr->addr[i]) { - iptr = (orte_gpr_replica_itagval_t*)ptr->addr[i]; - /* release the data storage */ - j = iptr->index; - /* DON'T RECORD THE ACTION - THIS WILL PREVENT US FROM SENDING - * BOTH THE OLD AND THE NEW DATA BACK ON A SUBSCRIPTION - * REQUEST - */ - /* remove the itag value from the container's list */ - for (k=0; k < orte_value_array_get_size(&(cptr->itaglist)); k++) { - if (iptr->itag == ORTE_VALUE_ARRAY_GET_ITEM(&(cptr->itaglist), orte_gpr_replica_itag_t, k)) { - orte_value_array_remove_item(&(cptr->itaglist), k); - goto MOVEON; - } - } - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - -MOVEON: - OBJ_RELEASE(iptr); - /* remove the entry from the container's itagval array */ - orte_pointer_array_set_item(cptr->itagvals, j, NULL); - (cptr->num_itagvals)--; - } - } - - /* now add new item in their place */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_add_keyval(&iptr, seg, cptr, kptr))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* record that we did this */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_record_action(seg, cptr, iptr, - ORTE_GPR_REPLICA_ENTRY_CHANGED | - ORTE_GPR_REPLICA_ENTRY_CHG_TO))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - - /* update any storage locations that were pointing to these items */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_update_storage_locations(iptr))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* return the location of the new iptr */ - *iptr2 = iptr; - - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_search_container(orte_gpr_replica_addr_mode_t addr_mode, - orte_gpr_replica_itag_t *itags, orte_std_cntr_t num_itags, - orte_gpr_replica_container_t *cptr) -{ - orte_gpr_replica_itagval_t **ptr; - orte_std_cntr_t i, j, index; - - OPAL_TRACE(3); - - /* ensure the search array is clear */ - orte_pointer_array_clear(orte_gpr_replica_globals.srch_ival); - orte_gpr_replica_globals.num_srch_ival = 0; - - /* check list of itags in container to see if there is a match according - * to addr_mode spec - */ - if (orte_gpr_replica_check_itag_list(addr_mode, num_itags, itags, - orte_value_array_get_size(&(cptr->itaglist)), - ORTE_VALUE_ARRAY_GET_BASE(&(cptr->itaglist), orte_gpr_replica_itag_t))) { - /* there is! so now collect those values into the search array */ - ptr = (orte_gpr_replica_itagval_t**)((cptr->itagvals)->addr); - for (i=0, j=0; j < cptr->num_itagvals && - i < (cptr->itagvals)->size; i++) { - if (NULL != ptr[i]) { - j++; - if (orte_gpr_replica_check_itag_list(ORTE_GPR_REPLICA_OR, - num_itags, itags, - 1, &(ptr[i]->itag))) { - - if (0 > orte_pointer_array_add(&index, orte_gpr_replica_globals.srch_ival, ptr[i])) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - orte_pointer_array_clear(orte_gpr_replica_globals.srch_ival); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (orte_gpr_replica_globals.num_srch_ival)++; - } - } - } - } - - return ORTE_SUCCESS; -} - - -bool orte_gpr_replica_value_in_container(orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itagval_t *iptr) -{ - orte_gpr_replica_itagval_t **ptr; - orte_std_cntr_t i, j; - - ptr = (orte_gpr_replica_itagval_t**)((cptr->itagvals)->addr); - for (i=0, j=0; j < cptr->num_itagvals && - i < (cptr->itagvals)->size; i++) { - if (NULL != ptr[i]) { - j++; - if ((ptr[i]->itag == iptr->itag) && (ptr[i]->value->type == iptr->value->type)) { - if (ORTE_EQUAL == orte_dss.compare(ptr[i]->value->data, iptr->value->data, iptr->value->type)) { - return true; - } - } - } - } - - return false; -} - -int orte_gpr_replica_release_segment(orte_gpr_replica_segment_t **seg) -{ - int rc; - orte_std_cntr_t i; - - OPAL_TRACE(3); - - i = (*seg)->itag; - OBJ_RELEASE(*seg); - - if (0 > (rc = orte_pointer_array_set_item(orte_gpr_replica.segments, i, NULL))) { - return rc; - } - (orte_gpr_replica.num_segs)--; - - return ORTE_SUCCESS; -} - -int orte_gpr_replica_purge_itag(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_itag_t itag) -{ - OPAL_TRACE(3); - - /* - * Begin by looping through the segment's containers and check - * their descriptions first - if removing this name leaves that - * list empty, then remove the container. - * If the container isn't to be removed, then loop through all - * the container's keyvalue pairs and check the "key" - if - * it matches, then remove that pair. If all pairs are removed, - * then remove the container - * */ - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_subscribe_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_subscribe_fn.c deleted file mode 100644 index 39f1487ac1..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_subscribe_fn.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - -int orte_gpr_replica_subscribe_fn(orte_process_name_t *requestor, - orte_std_cntr_t num_subs, - orte_gpr_subscription_t **subscriptions, - orte_std_cntr_t num_trigs, - orte_gpr_trigger_t **trigs) -{ - orte_gpr_replica_subscription_t *sub=NULL, **subs, **trigsubs; - orte_gpr_replica_trigger_t *trig=NULL; - orte_std_cntr_t i, j, k, m, n, index; - bool ignore; - int rc=ORTE_SUCCESS; - - OPAL_TRACE(2); - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "%s gpr_replica_subscribe: entered with num_trigs:%d", - ORTE_NAME_PRINT(orte_process_info.my_name), num_trigs); - } - - /* ensure one of the search arrays is clear - in this case, we - * use the sub_ptrs array to temporarily store the subscription pointers so we - * can properly link them to the triggers - */ - orte_pointer_array_clear(orte_gpr_replica_globals.sub_ptrs); - subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica_globals.sub_ptrs)->addr; - - for (i=0; i < num_subs; i++) { - if (ORTE_SUCCESS != (rc = - orte_gpr_replica_register_subscription(&sub, requestor, subscriptions[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* add the new subscription so we can link - * it to the triggers later - */ - if (0 > orte_pointer_array_add(&index, orte_gpr_replica_globals.sub_ptrs, sub)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - - /* now register any triggers */ - for (i=0; i < num_trigs; i++) { - if (ORTE_SUCCESS != (rc = - orte_gpr_replica_register_trigger(&trig, requestor, trigs[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* link the subscriptions to the new trigger. only do this if the - * subscription doesn't already exist on this trigger - otherwise, - * we'd just be duplicating things. - */ - trigsubs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr; - for (j=0, k=0; k < num_subs && - j < (orte_gpr_replica_globals.sub_ptrs)->size; j++) { - if (NULL != subs[j]) { - k++; - /* check to see if this subscription is already attached - * to this trigger - if not, add it - */ - ignore = false; - for (m=0, n=0; n < trig->num_subscriptions && - m < (trig->subscriptions)->size; m++) { - if (NULL != trigsubs[m]) { - n++; - if (subs[j] == trigsubs[m]) { /* already present */ - ignore = true; - } - } - } - if (!ignore) { /* new sub for this trig - add it */ - if (0 > orte_pointer_array_add(&index, trig->subscriptions, subs[j])) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (trig->num_subscriptions)++; - } - } - } - } - - return rc; -} diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_trig_ops_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_trig_ops_fn.c deleted file mode 100644 index 25747c5783..0000000000 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_trig_ops_fn.c +++ /dev/null @@ -1,1344 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - support functions. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/mca/gpr/replica/api_layer/gpr_replica_api.h" -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" - - -/* - * GENERAL REGISTRY TRIGGER FUNCTIONS - */ -int -orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr, - orte_process_name_t *requestor, - orte_gpr_subscription_t *subscription) -{ - int rc; - orte_std_cntr_t i, j, k, num_tokens, num_keys; - orte_gpr_replica_subscription_t *sub, **subs; - orte_gpr_replica_requestor_t *req, **reqs; - orte_gpr_replica_addr_mode_t tok_mode, key_mode; - orte_gpr_replica_itag_t itag, *tokentags=NULL; - orte_gpr_replica_ivalue_t *ival; - - OPAL_TRACE(3); - - /* if this is a named subscription, see if that name has - * already been entered on the replica. If it has, then we - * simply attach this recipient to that subscription - - * this indicates that this recipient would also like a - * copy of the data generated by that subscription - */ - if (NULL != subscription->name) { - /* look for this name on current list */ - subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr; - for (k=0, j=0; j < orte_gpr_replica.num_subs && - k < (orte_gpr_replica.subscriptions)->size; k++) { - if (NULL != subs[k]) { - j++; - if (NULL != subs[k]->name && NULL != subscription->name && - 0 == strcmp(subs[k]->name, subscription->name)) { - /* found name on list - add another recipient to that - * subscription - */ - sub = subs[k]; - goto ADDREQ; - } - } - } - } - - /* Either this is NOT a named subscription, or it is named - * but that name is NOT on the current list of subscriptions. - * Either way, we add this subscription to the replica's list. - * - * NOTE that you CANNOT add yourself as a recipient to a non-named - * subscription - even if all the subscription specifications are - * identical. This is done in the interest of speed as checking - * all the specifications would take some time. Subscriptions are - * "named" because they are intended to be used by multiple processes. - * Un-named subscriptions are, therefore, assumed to be specialty - * subscriptions that do not merit such consideration. - */ - - /* see if another subscription is available on the system */ - if (ORTE_GPR_SUBSCRIPTION_ID_MAX-1 < orte_gpr_replica.num_subs) { /* none left! */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - sub = OBJ_NEW(orte_gpr_replica_subscription_t); - if (NULL == sub) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - sub->idtag = orte_gpr_replica.num_subs; - - if (NULL != subscription->name) { - sub->name = strdup(subscription->name); - } - sub->action = subscription->action; - if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & sub->action) { - sub->active = false; - } else { - sub->active = true; - } - - /* store all the data specifications for this subscription */ - for (i=0; i < subscription->cnt; i++) { - ival = OBJ_NEW(orte_gpr_replica_ivalue_t); - if (NULL == ival) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(sub); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* find and store the segment */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&(ival->seg), true, - subscription->values[i]->segment))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(sub); - OBJ_RELEASE(ival); - return rc; - } - tok_mode = ORTE_GPR_REPLICA_TOKMODE((subscription->values[i])->addr_mode); - if (0x00 == tok_mode) { /* default token address mode to AND */ - subscription->values[i]->addr_mode = subscription->values[i]->addr_mode | ORTE_GPR_TOKENS_AND; - } - key_mode = ORTE_GPR_REPLICA_KEYMODE((subscription->values[i])->addr_mode); - if (0x00 == key_mode) { /* default key address mode to OR */ - subscription->values[i]->addr_mode = subscription->values[i]->addr_mode | ORTE_GPR_KEYS_OR; - key_mode = (orte_gpr_replica_addr_mode_t)subscription->values[i]->addr_mode; - } - ival->addr_mode = ORTE_GPR_REPLICA_REMOVE_OVERWRITE(subscription->values[i]->addr_mode); - - if (NULL != subscription->values[i]->tokens && - 0 < subscription->values[i]->num_tokens) { - num_tokens = subscription->values[i]->num_tokens; /* indicates non-NULL terminated list */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&tokentags, ival->seg, - subscription->values[i]->tokens, &num_tokens))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(sub); - OBJ_RELEASE(ival); - return rc; - - } - if (ORTE_SUCCESS != (rc = orte_value_array_set_size(&(ival->tokentags), (orte_std_cntr_t)num_tokens))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(sub); - OBJ_RELEASE(ival); - return rc; - } - for (j=0; j < num_tokens; j++) { - ORTE_VALUE_ARRAY_SET_ITEM(&(ival->tokentags), orte_gpr_replica_itag_t, - j, tokentags[j]); - } - free(tokentags); - tokentags = NULL; - } - - if (NULL != subscription->values[i]->keyvals && - 0 < subscription->values[i]->cnt) { - num_keys = subscription->values[i]->cnt; - if (ORTE_SUCCESS != (rc = orte_value_array_set_size(&(ival->keytags), num_keys))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(sub); - OBJ_RELEASE(ival); - return rc; - } - for (j=0; j < num_keys; j++) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_itag(&itag, - ival->seg, - subscription->values[i]->keyvals[j]->key))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(sub); - OBJ_RELEASE(ival); - return rc; - } - ORTE_VALUE_ARRAY_SET_ITEM(&(ival->keytags), orte_gpr_replica_itag_t, - j, itag); - } - } - /* add the object to the subscription's value pointer array */ - if (0 > (rc = orte_pointer_array_add(&(ival->index), sub->values, ival))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(sub); - OBJ_RELEASE(ival); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (sub->num_values)++; - } - /* add the object to the replica's subscriptions pointer array */ - if (0 > (rc = orte_pointer_array_add(&(sub->index), orte_gpr_replica.subscriptions, sub))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(sub); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (orte_gpr_replica.num_subs)++; - -ADDREQ: - /* see if this requestor and subscription id is already attached to - * this subscription - if so, ignore it to avoid duplicates - */ - reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr; - for (i=0, j=0; j < sub->num_requestors && - i < (sub->requestors)->size; i++) { - if (NULL != reqs[i]) { - j++; - if ((NULL == reqs[i]->requestor && NULL != requestor) || - (NULL != reqs[i]->requestor && NULL == requestor)) { - continue; - } - if (reqs[i]->idtag == subscription->id && - ((NULL == reqs[i]->requestor && NULL == requestor) || - (ORTE_EQUAL == orte_dss.compare(reqs[i]->requestor, requestor, ORTE_NAME)))) { - /* found this requestor - do not add it again */ - goto DONESUB; - } - } - } - - /* get here if requestor is not already on this subscription - * add this requestor to the subscription - */ - req = OBJ_NEW(orte_gpr_replica_requestor_t); - if (NULL == req) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (NULL != requestor) { - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(req->requestor), requestor, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else { - req->requestor = NULL; - } - - if (0 > (rc = orte_pointer_array_add(&(req->index), sub->requestors, req))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (sub->num_requestors)++; - - /* store the requestor's subscription id so they can ask - * us to cancel their subscription at a later time, - * if they choose to do so, and so that we can tell - * them which callback function to use when we send - * them a datagram - */ - req->idtag = subscription->id; - - /* - * New subscription, send initial values? - */ - if(sub->active && subscription->action & ORTE_GPR_NOTIFY_PRE_EXISTING) { - - if(ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(sub, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } -DONESUB: - /* record where the subscription went */ - *subptr = sub; - - return ORTE_SUCCESS; -} - -int -orte_gpr_replica_register_trigger(orte_gpr_replica_trigger_t **trigptr, - orte_process_name_t *requestor, - orte_gpr_trigger_t *trigger) -{ - orte_gpr_replica_trigger_t *trig, **trigs; - int rc; - orte_std_cntr_t i, j, k, m, num_tokens, index; - orte_gpr_replica_addr_mode_t tok_mode, key_mode; - orte_gpr_replica_segment_t *seg; - orte_gpr_replica_container_t **cptr, *cptr2; - orte_gpr_replica_itag_t itag, *tokentags=NULL; - orte_gpr_replica_itagval_t *iptr; - orte_gpr_replica_counter_t *cntr; - orte_gpr_replica_trigger_requestor_t *req, **reqs; - bool found; - - OPAL_TRACE(3); - - /* set a default response value */ - *trigptr = NULL; - - /* if this is a named trigger, see if that name has - * already been entered on the replica. If it has, then we - * can simply return the pointer to the existing trigger. - */ - if (NULL != trigger->name) { - /* look for this name on current list */ - trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr; - for (k=0, j=0; j < orte_gpr_replica.num_trigs && - k < (orte_gpr_replica.triggers)->size; k++) { - if (NULL != trigs[k]) { - j++; - if (trigs[k]->name && NULL != trigger->name && - 0 == strcmp(trigs[k]->name, trigger->name)) { - /* found name on list - add recipient's - * trigger info to that trigger - */ - trig = trigs[k]; - goto ADDREQ; - } - } - } - } - - /* Either this is NOT a named trigger, or it is named - * but that name is NOT on the current list of triggers. - * Either way, we add this trigger to the replica's list. - * - * NOTE that you CANNOT add a subscription to a pre-entered non-named - * trigger - even if all the trigger specifications are - * identical. This is done in the interest of speed as checking - * all the specifications would take some time. Triggers are - * "named" because they are intended to be used by multiple processes. - * Un-named triggers are, therefore, assumed to be specialty - * triggers that do not merit such consideration. - */ - - /* see if another trigger is available */ - if (ORTE_GPR_TRIGGER_ID_MAX-1 < orte_gpr_replica.num_trigs) { /* none left! */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - trig = OBJ_NEW(orte_gpr_replica_trigger_t); - if (NULL == trig) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - trig->idtag = orte_gpr_replica.num_trigs; - - /* if a name for this trigger has been provided, copy it over */ - if (NULL != trigger->name) { - trig->name = strdup(trigger->name); - } - /* copy the action field */ - trig->action = trigger->action; - - /* put this trigger on the replica's list */ - if (0 > (rc = orte_pointer_array_add(&(trig->index), orte_gpr_replica.triggers, trig))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (orte_gpr_replica.num_trigs)++; - - /* locate and setup the trigger's counters */ - for (i=0; i < trigger->cnt; i++) { - /* get this counter's addressing modes */ - tok_mode = ORTE_GPR_REPLICA_TOKMODE((trigger->values[i])->addr_mode); - if (0x00 == tok_mode) { /* default token address mode to AND */ - tok_mode = ORTE_GPR_REPLICA_AND; - } - key_mode = ORTE_GPR_REPLICA_KEYMODE((trigger->values[i])->addr_mode); - if (0x00 == key_mode) { /* default key address mode to OR */ - key_mode = ORTE_GPR_REPLICA_OR; - } - - /* locate this counter's segment - this is where the counter will be */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, - trigger->values[i]->segment))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); - return rc; - } - - /* convert the counter's tokens to an itaglist */ - if (NULL != (trigger->values[i])->tokens && - 0 < (trigger->values[i])->num_tokens) { - num_tokens = (trigger->values[i])->num_tokens; /* indicates non-NULL terminated list */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&tokentags, seg, - (trigger->values[i])->tokens, &num_tokens))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - - /* find the specified container(s) */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_containers(seg, tok_mode, - tokentags, num_tokens))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - - if (0 == orte_gpr_replica_globals.num_srch_cptr) { - /* no existing container found - create one using all the tokens */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_container(&cptr2, seg, - num_tokens, tokentags))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - - /* ok, store all of this counter's values in the new container, adding a pointer to each - * one in the trigger's counter array - */ - for (j=0; j < (trigger->values[i])->cnt; j++) { - if (ORTE_SUCCESS != (rc = - orte_gpr_replica_add_keyval(&iptr, seg, cptr2, - (trigger->values[i])->keyvals[j]))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - cntr = OBJ_NEW(orte_gpr_replica_counter_t); - if (NULL == cntr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - cntr->seg = seg; - cntr->cptr = cptr2; - cntr->iptr = iptr; - /* if the trigger is at a level, then the requestor MUST specify the - * level in the provided keyval. Otherwise, we only need to store - * the iptr since we will be comparing levels between multiple - * counters - */ - if (trigger->action & ORTE_GPR_TRIG_AT_LEVEL) { - if (NULL == trigger->values[i]->keyvals) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - goto CLEANUP; - } - cntr->trigger_level.value = OBJ_NEW(orte_data_value_t); - if (NULL == cntr->trigger_level.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - cntr->trigger_level.value->type = ((trigger->values[i])->keyvals[j])->value->type; - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((cntr->trigger_level.value)->data), - ((trigger->values[i])->keyvals[j])->value->data, - ((trigger->values[i])->keyvals[j])->value->type))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - if (0 > orte_pointer_array_add(&index, trig->counters, cntr)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - } - trig->num_counters += (trigger->values[i])->cnt; - } else { /* For each counter, go through the list of containers and - see if it already exists in container. Only allow each - counter to be identified once - error if either a counter is never - found or already existing in more than one place. */ - cptr = (orte_gpr_replica_container_t**)(orte_gpr_replica_globals.srch_cptr)->addr; - for (j=0; j < (trigger->values[i])->cnt; j++) { - found = false; - if (ORTE_SUCCESS != orte_gpr_replica_dict_lookup(&itag, seg, - ((trigger->values[i])->keyvals[j])->key)) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - for (k=0, m=0; m < orte_gpr_replica_globals.num_srch_cptr && - k < (orte_gpr_replica_globals.srch_cptr)->size; k++) { - if (NULL != cptr[k]) { - m++; - if (ORTE_SUCCESS == orte_gpr_replica_search_container( - ORTE_GPR_REPLICA_OR, - &itag, 1, cptr[k]) && - 0 < orte_gpr_replica_globals.num_srch_ival) { - /* this key already exists - make sure it's unique - */ - if (1 < orte_gpr_replica_globals.num_srch_ival || found) { - /* not unique - error out */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - goto CLEANUP; - } - /* okay, add to trigger's counter array */ - found = true; - iptr = (orte_gpr_replica_itagval_t*)((orte_gpr_replica_globals.srch_ival)->addr[0]); - cntr = OBJ_NEW(orte_gpr_replica_counter_t); - if (NULL == cntr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - cntr->seg = seg; - cntr->cptr = cptr[k]; - cntr->iptr = iptr; - /* if the trigger is at a level, then the requestor MUST specify the - * level in the provided keyval. Otherwise, we only need to store - * the iptr since we will be comparing levels between multiple - * counters - */ - if (trigger->action & ORTE_GPR_TRIG_AT_LEVEL) { - if (NULL == trigger->values[i]->keyvals) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - goto CLEANUP; - } - cntr->trigger_level.value = OBJ_NEW(orte_data_value_t); - if (NULL == cntr->trigger_level.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - cntr->trigger_level.value->type = ((trigger->values[i])->keyvals[j])->value->type; - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((cntr->trigger_level.value)->data), - ((trigger->values[i])->keyvals[j])->value->data, - ((trigger->values[i])->keyvals[j])->value->type))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - if (0 > orte_pointer_array_add(&index, trig->counters, cntr)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - (trig->num_counters)++; - } /* end if found */ - } /* end if cptr NULL */ - } /* end for k */ - if (!found) { /* specified counter never found - error */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - goto CLEANUP; - } /* end if found */ - } /* end for j */ - } /* end if/else container found */ - } /* end for i */ - -ADDREQ: - /* see if this requestor and trigger id is already attached to - * this trigger - if so, ignore it to avoid duplicates - */ - reqs = (orte_gpr_replica_trigger_requestor_t**)(trig->attached)->addr; - for (i=0, j=0; j < trig->num_attached && - i < (trig->attached)->size; i++) { - if (NULL != reqs[i]) { - j++; - /* if one is NULL and the other isn't, then they can't possibly match */ - if ((NULL == reqs[i]->requestor && NULL != requestor) || - (NULL != reqs[i]->requestor && NULL == requestor)) { - continue; - } - if (reqs[i]->idtag == trigger->id && - ((NULL == reqs[i]->requestor && NULL == requestor) || - (ORTE_EQUAL == orte_dss.compare(reqs[i]->requestor, requestor, ORTE_NAME)))) { - /* found this requestor - do not add it again */ - goto DONETRIG; - } - } - } - - /* add this requestor to the trigger's list of "attached" callers */ - req = OBJ_NEW(orte_gpr_replica_trigger_requestor_t); - if (NULL == req) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (NULL != requestor) { - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(req->requestor), requestor, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else { - req->requestor = NULL; - } - - if (0 > (rc = orte_pointer_array_add(&(req->index), trig->attached, req))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - (trig->num_attached)++; - - /* store the requestor's trigger id so they can ask - * us to cancel their subscription at a later time, - * if they choose to do so. - */ - req->idtag = trigger->id; - - /* see if the ROUTE_DATA_TO_ME flag is set. This indicates - * that the requestor wants all data sent to them and - * is assuming all responsibility for properly routing - * the data - */ - if (ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME & trig->action) { - if (NULL == trig->master) { - /* someone already requested this responsibility. - * if I'm a singleton, this is NOT an error - the - * initial "launch" has recorded the stage gate - * triggers using the [-1,-1,-1] name, so we need to - * overwrite that with my name so I get the notifications. - */ -#if 0 - if (orte_process_info.singleton || orte_process_info.seed) { -opal_output(0, "Trigger master being redefined"); - trig->master = req; - } else { - /* if i'm not a singleton, then this is an error - report it */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE); - } - } else { -#endif - trig->master = req; - } - } - -DONETRIG: - /* report the location of this trigger */ - *trigptr = trig; - - /* record that we had success */ - rc = ORTE_SUCCESS; - -CLEANUP: - if (NULL != tokentags) { - free(tokentags); - } - - return rc; -} - -/* - * Remove a subscription from the system. Note that the requestor only - * knows their local subscription id, so that is what has been provided. - * We need to find the specified combination of requestor and - * subscription id, and then delete it - */ -int -orte_gpr_replica_remove_subscription(orte_process_name_t *requestor, - orte_gpr_subscription_id_t id) -{ - orte_gpr_replica_subscription_t **subs, *sub; - orte_gpr_replica_requestor_t **reqs, *req; - orte_gpr_replica_trigger_t **trigs; - orte_std_cntr_t i, j, k, m; - bool found; - - OPAL_TRACE(3); - - /* find this subscription on the list */ - subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_subs && - i < (orte_gpr_replica.subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - reqs = (orte_gpr_replica_requestor_t**)(subs[i]->requestors)->addr; - for (k=0, m=0; m < subs[i]->num_requestors && - k < (subs[i]->requestors)->size; k++) { - if (NULL != reqs[k]) { - m++; - if (id == reqs[k]->idtag && - ((NULL == requestor && NULL == reqs[k]->requestor) || - (NULL != requestor && NULL != reqs[k]->requestor && - ORTE_EQUAL == orte_dss.compare(reqs[k]->requestor, requestor, ORTE_NAME)))) { - /* this is the subscription */ - sub = subs[i]; - req = reqs[k]; - goto PROCESS; - } - } - } - } - } - /* if we arrive here, then we were - * unable to find a matching subscription. report that fact - * and exit - */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - -PROCESS: - /* remove the specified requestor. if this was the last - * requestor on this subscription, remove the subscription - * as well - */ - /* must release the requestor object PRIOR to setting - * the indexed location to NULL or we lose the pointer - */ - i = req->index; - OBJ_RELEASE(req); - orte_pointer_array_set_item(sub->requestors, i, NULL); - (sub->num_requestors)--; - if (0 == sub->num_requestors) { /* nobody left */ - /* NOTE: cannot release sub here as we still need the - * object so we can check for it in the list of triggers - */ - orte_pointer_array_set_item(orte_gpr_replica.subscriptions, sub->index, NULL); - (orte_gpr_replica.num_subs)--; - } - - /* check for this subscription throughout the list of triggers - * and remove it wherever found - */ - trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trigs[i]) { - j++; - found = false; - subs = (orte_gpr_replica_subscription_t**)(trigs[i]->subscriptions)->addr; - for (k=0, m=0; !found && m < trigs[i]->num_subscriptions && - k < (trigs[i]->subscriptions)->size; k++) { - if (NULL != subs[k]) { - m++; - if (sub == subs[k]) { /* match found */ - orte_pointer_array_set_item(trigs[i]->subscriptions, k, NULL); - (trigs[i]->num_subscriptions)--; - /* if that was the last subscription on this trigger, then - * remove the trigger - not needed any more - */ - if (0 == trigs[i]->num_subscriptions) { - OBJ_RELEASE(trigs[i]); - orte_pointer_array_set_item(orte_gpr_replica.triggers, i, NULL); - } - found = true; - } - } - } - } - } - /* done with sub, so now can release it if we need to do so */ - if (0 == sub->num_requestors) OBJ_RELEASE(sub); - - /* ALL DONE! */ - return ORTE_SUCCESS; -} - -/* - * Remove a trigger from the system. Note that the requestor only - * knows their local trigger id, so that is what has been provided. - * We need to find the specified combination of requestor and - * trigger id, and then delete it - */ -int -orte_gpr_replica_remove_trigger(orte_process_name_t *requestor, - orte_gpr_trigger_id_t id) -{ - orte_gpr_replica_subscription_t **subs; - orte_gpr_replica_trigger_requestor_t **reqs, *req; - orte_gpr_replica_trigger_t **trigs, *trig; - orte_std_cntr_t i, j, k, m; - - OPAL_TRACE(3); - - /* find this trigger on the list */ - trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trigs[i]) { - j++; - reqs = (orte_gpr_replica_trigger_requestor_t**)(trigs[i]->attached)->addr; - for (k=0, m=0; m < trigs[i]->num_attached && - k < (trigs[i]->attached)->size; k++) { - if (NULL != reqs[k]) { - m++; - if (id == reqs[k]->idtag && - ((NULL == requestor && NULL == reqs[k]->requestor) || - (NULL != requestor && NULL != reqs[k]->requestor && - ORTE_EQUAL == orte_dss.compare(reqs[k]->requestor, requestor, ORTE_NAME)))) { - /* this is the trigger */ - trig = trigs[i]; - req = reqs[k]; - goto PROCESS; - } - } - } - } - } - /* if we arrive here, then we had a remote requestor but were - * unable to find a matching trigger. report that fact - * and exit - */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - -PROCESS: - /* remove the specified requestor. if this was the last - * requestor on this trigger, remove the trigger - * as well - */ - /* must release the requestor object PRIOR to setting - * the indexed location to NULL or we lose the pointer - */ - i = req->index; - OBJ_RELEASE(req); - orte_pointer_array_set_item(trig->attached, i, NULL); - (trig->num_attached)--; - if (0 == trig->num_attached) { /* nobody left */ - /* NOTE: cannot release trig here as we still need the - * object so we can clear any attached subscriptions - */ - orte_pointer_array_set_item(orte_gpr_replica.triggers, trig->index, NULL); - (orte_gpr_replica.num_trigs)--; - } - - /* now need to check any attached subscriptions. if the subscription - * was flagged to be deleted after the trigger fired, or was flagged - * to only start once the trigger had fired, then we need - * to delete it here. otherwise, we leave the subscription alone. - */ - subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr; - for (i=0, j=0; j < trig->num_subscriptions && - i < (trig->subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & subs[i]->action || - ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & subs[i]->action) { - OBJ_RELEASE(subs[i]); - } - } - } - - /* done processing trigger - can release it now, if we need to do so */ - if (0 == trig->num_attached) OBJ_RELEASE(trig); - - /* ALL DONE! */ - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_record_action(orte_gpr_replica_segment_t *seg, - orte_gpr_replica_container_t *cptr, - orte_gpr_replica_itagval_t *iptr, - orte_gpr_replica_action_t action) -{ - orte_gpr_replica_action_taken_t *new_action; - orte_std_cntr_t index; - int rc; - - OPAL_TRACE(3); - - new_action = OBJ_NEW(orte_gpr_replica_action_taken_t); - if (NULL == new_action) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - new_action->action = action; - - /* store pointers to the affected itagval */ - new_action->seg = seg; - new_action->cptr = cptr; - new_action->iptr = iptr; - - /* "retain" ALL of the respective objects so they can't disappear until - * after we process the actions - */ - OBJ_RETAIN(seg); - OBJ_RETAIN(cptr); - OBJ_RETAIN(iptr); - - /* add the new action record to the array */ - if (0 > (rc = orte_pointer_array_add(&index, orte_gpr_replica_globals.acted_upon, new_action))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* increment the number acted upon */ - (orte_gpr_replica_globals.num_acted_upon)++; - - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_update_storage_locations(orte_gpr_replica_itagval_t *new_iptr) -{ - orte_gpr_replica_trigger_t **trig; - orte_gpr_replica_counter_t **cntrs; - orte_gpr_replica_itagval_t **old_iptrs; - orte_std_cntr_t i, j, k, m, n, p; - bool replaced; - - OPAL_TRACE(3); - - trig = (orte_gpr_replica_trigger_t**)((orte_gpr_replica.triggers)->addr); - for (i=0, m=0; m < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trig[i]) { - m++; - cntrs = (orte_gpr_replica_counter_t**)((trig[i]->counters)->addr); - for (j=0, n=0; n < trig[i]->num_counters && - j < (trig[i]->counters)->size; j++) { - if (NULL != cntrs[j]) { - n++; - old_iptrs = (orte_gpr_replica_itagval_t**)((orte_gpr_replica_globals.srch_ival)->addr); - for (k=0, p=0; p < orte_gpr_replica_globals.num_srch_ival && - k < (orte_gpr_replica_globals.srch_ival)->size; k++) { - replaced = false; - if (NULL != old_iptrs[k]) { - p++; - if (old_iptrs[k] == cntrs[j]->iptr) { - if (NULL == new_iptr || replaced) { - orte_pointer_array_set_item(trig[i]->counters, j, NULL); - (trig[i]->num_counters)--; - } else if (!replaced) { - cntrs[j]->iptr = new_iptr; - replaced = true; - } - } - } - } - } - } - } - } - return ORTE_SUCCESS; -} - - -int orte_gpr_replica_check_events(void) -{ - orte_gpr_replica_trigger_t **trigs; - orte_gpr_replica_subscription_t **subs; - orte_gpr_replica_action_taken_t **ptr; - orte_std_cntr_t i, j; - int rc; - - OPAL_TRACE(3); - - /* we first check all the subscriptions to see if any are "active". - * this needs to be done BEFORE we check triggers to ensure that - * triggers that turn "on" a subscription don't cause duplicate - * messages to their requestor - */ - subs = (orte_gpr_replica_subscription_t**)((orte_gpr_replica.subscriptions)->addr); - for (i=0, j=0; j < orte_gpr_replica.num_subs && - i < (orte_gpr_replica.subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - if (subs[i]->active) { - /* this is an active subscription - check to see if - * any of the recorded actions match its specified - * conditions and process it if so - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_check_subscription(subs[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } /* if notify */ - } - } - - /* check for triggers that might have fired. - * NOTE: MUST DO THIS *AFTER* THE NOTIFY CHECK. If the trigger was - * set to start notifies after firing, then checking notifies - * AFTER the triggers were processed causes the notification to - * be sent twice. - */ - trigs = (orte_gpr_replica_trigger_t**)((orte_gpr_replica.triggers)->addr); - for (i=0, j=0; j < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trigs[i] && !trigs[i]->processing) { - j++; - /* check the trigger */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_check_trig(trigs[i]))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } /* if trig not NULL */ - } - - /* clean up the action record. The recorded actions from a given - * call into the registry are only needed through the "check_events" - * function call. - */ - ptr = (orte_gpr_replica_action_taken_t**)((orte_gpr_replica_globals.acted_upon)->addr); - for (i=0, j=0; j < orte_gpr_replica_globals.num_acted_upon && - i < (orte_gpr_replica_globals.acted_upon)->size; i++) { - if (NULL != ptr[i]) { - j++; - OBJ_RELEASE(ptr[i]); - } - } - orte_gpr_replica_globals.num_acted_upon = 0; - - return ORTE_SUCCESS; -} - -/* - * Check a trigger to see if it has fired based on the current - * state of its counters - */ -int orte_gpr_replica_check_trig(orte_gpr_replica_trigger_t *trig) -{ - orte_gpr_replica_subscription_t **subs; - orte_gpr_replica_counter_t **cntr; - orte_gpr_replica_itagval_t *base_value=NULL; - orte_data_type_t base_type = ORTE_UNDEF; - bool first, fire; - orte_std_cntr_t i, j; - int rc; - - OPAL_TRACE(3); - - if (ORTE_GPR_TRIG_CMP_LEVELS & trig->action) { /* compare the levels of the counters */ - cntr = (orte_gpr_replica_counter_t**)((trig->counters)->addr); - first = true; - fire = true; - for (i=0, j=0; j < trig->num_counters && - i < (trig->counters)->size && fire; i++) { - if (NULL != cntr[i]) { - j++; - if (first) { - base_value = cntr[i]->iptr; - base_type = cntr[i]->iptr->value->type; - first = false; - } else { - if (base_type != cntr[i]->iptr->value->type) { - ORTE_ERROR_LOG(ORTE_ERR_COMPARE_FAILURE); - return ORTE_ERR_COMPARE_FAILURE; - } - if (ORTE_EQUAL != orte_dss.compare(base_value->value->data, cntr[i]->iptr->value->data, base_type)) { - fire = false; - } - } - } - } - if (fire) { /* all levels were equal */ - goto FIRED; - } - return ORTE_SUCCESS; - - } else if (ORTE_GPR_TRIG_AT_LEVEL & trig->action) { /* see if counters are at a level */ - cntr = (orte_gpr_replica_counter_t**)((trig->counters)->addr); - fire = true; - for (i=0, j=0; j < trig->num_counters && - i < (trig->counters)->size && fire; i++) { - if (NULL != cntr[i]) { - j++; - if (cntr[i]->iptr->value->type != cntr[i]->trigger_level.value->type) { - ORTE_ERROR_LOG(ORTE_ERR_COMPARE_FAILURE); - return ORTE_ERR_COMPARE_FAILURE; - } - if (ORTE_EQUAL != orte_dss.compare(cntr[i]->iptr->value->data, - cntr[i]->trigger_level.value->data, - cntr[i]->iptr->value->type)) { - fire = false; - } - } - } - if (fire) { /* all counters at specified trigger level */ - goto FIRED; - } - return ORTE_SUCCESS; - } - - return ORTE_SUCCESS; /* neither cmp nor at level set */ - -FIRED: - /* if this trigger wants everything routed through a "master", then we register - * this as a trigger_callback. - */ - if (NULL != trig->master) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_trigger_callback(trig))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* for each subscription assocated with this trigger, check to see if - * the subscription needs any special treatment - */ - subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr; - for (i=0, j=0; j < trig->num_subscriptions && - i < (trig->subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - /* if ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG set, set the subscription - * "active" to indicate that trigger fired - */ - if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & subs[i]->action) { - subs[i]->active = true; - } - /* if ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG set, then set the flag - * so it can be cleaned up later - */ - if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & subs[i]->action) { - subs[i]->cleanup = true; - } - } - } - } else { - /* for each subscription associated with this trigger, we need to - * register a callback to the requestor that returns the specified - * data - */ - subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr; - for (i=0, j=0; j < trig->num_subscriptions && - i < (trig->subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(subs[i], NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* if ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG set, set the subscription - * "active" to indicate that trigger fired - */ - if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & subs[i]->action) { - subs[i]->active = true; - } - /* if ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG set, then set the flag - * so it can be cleaned up later - */ - if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & subs[i]->action) { - subs[i]->cleanup = true; - } - } - } - } - - - /* set the processing flag so we don't go into infinite loop if - * any callback functions modify the registry - */ - trig->processing = true; - - /* if this trigger was a one-shot, set flag to indicate it has fired - * so it can be cleaned up later - */ - if (ORTE_GPR_TRIG_ONE_SHOT & trig->action) { - trig->one_shot_fired = true; - } - - return ORTE_SUCCESS; -} - -/* - * Check subscriptions to see if any were fired by any of the - * recorded actions that have occurred on the registry. - */ -int orte_gpr_replica_check_subscription(orte_gpr_replica_subscription_t *sub) -{ - orte_gpr_replica_action_taken_t **ptr; - orte_std_cntr_t i, j, k; - orte_gpr_value_t *value; - orte_gpr_addr_mode_t addr_mode; - int rc=ORTE_SUCCESS; - - OPAL_TRACE(3); - - /* When entering this function, we know that the specified - * subscription is active since that was tested above. What we now need - * to determine is whether or not any of the data - * objects pointed to by the subscription were involved in a change. The - * subscription could describe a container - e.g., the subscriber might want to know - * if anything gets added to a container - or could be a container plus one or - * more keys when the subscriber wants to know when a specific value gets changed. - */ - ptr = (orte_gpr_replica_action_taken_t**)((orte_gpr_replica_globals.acted_upon)->addr); - for (i=0, k=0; k < orte_gpr_replica_globals.num_acted_upon && - i < (orte_gpr_replica_globals.acted_upon)->size; i++) { - if (NULL != ptr[i]) { - k++; - if ( - (((sub->action & ORTE_GPR_NOTIFY_ADD_ENTRY) && - (ptr[i]->action & ORTE_GPR_REPLICA_ENTRY_ADDED)) || - - ((sub->action & ORTE_GPR_NOTIFY_DEL_ENTRY) && - (ptr[i]->action & ORTE_GPR_REPLICA_ENTRY_DELETED)) || - - ((sub->action & ORTE_GPR_NOTIFY_VALUE_CHG) && - (ptr[i]->action & ORTE_GPR_REPLICA_ENTRY_CHG_TO)) || - - ((sub->action & ORTE_GPR_NOTIFY_VALUE_CHG) && - (ptr[i]->action & ORTE_GPR_REPLICA_ENTRY_CHG_FRM)) || - - ((sub->action & ORTE_GPR_NOTIFY_VALUE_CHG) && - (ptr[i]->action & ORTE_GPR_REPLICA_ENTRY_CHANGED))) - - && orte_gpr_replica_check_notify_matches(&addr_mode, sub, ptr[i])) { - - /* if the notify matched one of the subscription values, - * then the address mode will have - * been stored for us. we now need to send back - * the segment name and tokens from the container that is - * being addressed! - */ - /* Construct the base structure for returned data so it can be - * sent to the user, if required - */ - - if (ORTE_GPR_REPLICA_STRIPPED(addr_mode)) { - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&value, addr_mode, - NULL, 1, 0))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else { - if (ORTE_SUCCESS != (rc = orte_gpr_base_create_value(&value, addr_mode, - ptr[i]->seg->name, - 1, ptr[i]->cptr->num_itags))) { - ORTE_ERROR_LOG(rc); - return rc; - } - for (j=0; j < value->num_tokens; j++) { - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup( - &(value->tokens[j]), - ptr[i]->seg, - ptr[i]->cptr->itags[j]))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - } - - /* send back the recorded data */ - value->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t); - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup( - &((value->keyvals[0])->key), ptr[i]->seg, - ptr[i]->iptr->itag))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - (value->keyvals[0])->value = OBJ_NEW(orte_data_value_t); - if (NULL == value->keyvals[0]->value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(value); - return ORTE_ERR_OUT_OF_RESOURCE; - } - value->keyvals[0]->value->type = ptr[i]->iptr->value->type; - if (ORTE_SUCCESS != (rc = orte_dss.copy(&((value->keyvals[0]->value)->data), ptr[i]->iptr->value->data, ptr[i]->iptr->value->type))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(sub, value))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - /* register that this subscription is being processed - * to avoid potential infinite loops - */ - sub->processing = true; - } - } - } - -CLEANUP: - return rc; -} - - -bool orte_gpr_replica_check_notify_matches(orte_gpr_addr_mode_t *addr_mode, - orte_gpr_replica_subscription_t *sub, - orte_gpr_replica_action_taken_t *ptr) -{ - orte_gpr_replica_addr_mode_t tokmod; - orte_std_cntr_t i, j; - orte_gpr_replica_ivalue_t **ivals; - - OPAL_TRACE(3); - - /* we need to run through all of this subscription's defined - * values to see if any of them match the acted upon one. - */ - ivals = (orte_gpr_replica_ivalue_t**)(sub->values)->addr; - for (i=0, j=0; j < sub->num_values && - i < (sub->values)->size; i++) { - if (NULL != ivals[i]) { - j++; - /* first, check to see if the segments match */ - if (ivals[i]->seg != ptr->seg) { /* don't match - return false */ - continue; - } - - /* next, check to see if the containers match */ - tokmod = ORTE_GPR_REPLICA_TOKMODE(ivals[i]->addr_mode); - if (!orte_gpr_replica_check_itag_list(tokmod, - orte_value_array_get_size(&(ivals[i]->tokentags)), - ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->tokentags), orte_gpr_replica_itag_t), - (ptr->cptr)->num_itags, - (ptr->cptr)->itags)) { - /* not this container */ - continue; - } - /* next, check to see if this keyval was on the list */ - if (orte_gpr_replica_check_itag_list(ORTE_GPR_REPLICA_OR, - orte_value_array_get_size(&(ivals[i]->keytags)), - ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->keytags), orte_gpr_replica_itag_t), - 1, - &(ptr->iptr->itag))) { - /* keyval is on list - return the address mode */ - *addr_mode = ivals[i]->addr_mode; - return true; - } - } - } - - /* if we get here, then the acted upon value was - * nowhere on the subscription's defined values */ - return false; -} - - -int orte_gpr_replica_purge_subscriptions(orte_process_name_t *proc) -{ -#if 0 - orte_gpr_replica_trigger_t **trig; - orte_std_cntr_t i; - int rc; - - OPAL_TRACE(3); - - /* locate any notification events that have proc as the requestor - * and remove them - */ - trig = (orte_gpr_replica_triggers_t**)((orte_gpr_replica.triggers)->addr); - for (i=0; i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trig[i]) { - if (NULL == proc && NULL == trig[i]->requestor) { - if (ORTE_SUCCESS != (rc = orte_pointer_array_set_item(orte_gpr_replica.triggers, - trig[i]->index, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - OBJ_RELEASE(trig); - } else if (NULL != proc && NULL != trig[i]->requestor && - ORTE_EQUAL == orte_dss.compare(Oproc, trig[i]->requestor, ORTE_NAME)) { - if (ORTE_SUCCESS != (rc = orte_pointer_array_set_item(orte_gpr_replica.triggers, - trig[i]->index, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - OBJ_RELEASE(trig); - } - } - } -#endif - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/gpr_replica.h b/orte/mca/gpr/replica/gpr_replica.h deleted file mode 100644 index 507a141188..0000000000 --- a/orte/mca/gpr/replica/gpr_replica.h +++ /dev/null @@ -1,446 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -/** @file - */ - -#ifndef ORTE_GPR_REPLICA_H -#define ORTE_GPR_REPLICA_H - -#include "orte_config.h" - -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" - -#include "orte/class/orte_pointer_array.h" -#include "orte/class/orte_value_array.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/base/base.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * typedefs needed in replica component - */ - -typedef orte_std_cntr_t orte_gpr_replica_itag_t; -#define ORTE_GPR_REPLICA_ITAG_MAX ORTE_STD_CNTR_MAX - - -typedef uint8_t orte_gpr_replica_addr_mode_t; - -#define ORTE_GPR_REPLICA_AND (uint8_t)0x01 -#define ORTE_GPR_REPLICA_OR (uint8_t)0x02 -#define ORTE_GPR_REPLICA_XAND (uint8_t)0x04 -#define ORTE_GPR_REPLICA_XOR (uint8_t)0x08 -#define ORTE_GPR_REPLICA_NOT (uint8_t)0x10 - -#define ORTE_GPR_REPLICA_TOKMODE(n) 0x001f & n -#define ORTE_GPR_REPLICA_KEYMODE(n) ((0x1f00 & n) >> 8) & 0x001f -#define ORTE_GPR_REPLICA_STRIPPED(n) 0x2000 & n -#define ORTE_GPR_REPLICA_REMOVE_OVERWRITE(n) 0x7fff & n - -/* define a few action flags for trigger evaluation - */ -#define ORTE_GPR_REPLICA_NO_ACTION (int8_t) 0x00 -#define ORTE_GPR_REPLICA_ENTRY_ADDED (int8_t) 0x01 -#define ORTE_GPR_REPLICA_ENTRY_DELETED (int8_t) 0x02 -#define ORTE_GPR_REPLICA_ENTRY_CHANGED (int8_t) 0x04 -#define ORTE_GPR_REPLICA_ENTRY_CHG_TO (int8_t) 0x08 -#define ORTE_GPR_REPLICA_ENTRY_CHG_FRM (int8_t) 0x10 - - -typedef uint8_t orte_gpr_replica_action_t; - -/* - * Local subscription tracker for use by processes - * that are operating on the same node as the replica - */ -typedef struct { - opal_object_t super; /**< Allows this to be an object */ - orte_gpr_subscription_id_t id; /**< id of this subscription */ - orte_std_cntr_t index; /**< location of this subscription in array */ - char *name; - orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notificaiton */ - void *user_tag; /**< User-provided tag for callback function */ -} orte_gpr_replica_local_subscriber_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_local_subscriber_t); - - -/* - * Local trigger tracker for use by processes - * that are operating on the same node as the replica - */ -typedef struct { - opal_object_t super; /**< Allows this to be an object */ - orte_gpr_trigger_id_t id; /**< id of this trigger */ - orte_std_cntr_t index; /**< location of this trigger in array */ - char *name; - orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */ - void *user_tag; /**< User-provided tag for callback function */ -} orte_gpr_replica_local_trigger_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_local_trigger_t); - - -typedef struct { - int debug; - int isolate; - opal_mutex_t mutex; - orte_std_cntr_t num_local_subs; - orte_pointer_array_t *local_subscriptions; - orte_std_cntr_t num_local_trigs; - orte_pointer_array_t *local_triggers; - orte_std_cntr_t num_srch_cptr; - orte_pointer_array_t *srch_cptr; - orte_std_cntr_t num_overwritten; - orte_pointer_array_t *overwritten; - orte_pointer_array_t *sub_ptrs; - orte_std_cntr_t num_srch_ival; - orte_pointer_array_t *srch_ival; - orte_std_cntr_t num_acted_upon; - orte_pointer_array_t *acted_upon; -} orte_gpr_replica_globals_t; - - -/* - * Registry "head" - * The registry "head" contains: - * - * (3) a managed array of pointers to segment objects. - * - * (4) a managed array of pointers to triggers acting on the entire registry - * - * (4) a managed array of pointers to subscriptions acting on the entire registry - * - */ -struct orte_gpr_replica_t { - orte_pointer_array_t *segments; /**< Managed array of pointers to segment objects */ - orte_std_cntr_t num_segs; - orte_pointer_array_t *triggers; /**< Managed array of pointers to triggers */ - orte_gpr_trigger_id_t num_trigs; - orte_pointer_array_t *subscriptions; /**< Managed array of pointers to subscriptions */ - orte_gpr_subscription_id_t num_subs; - bool processing_callbacks; - opal_list_t callbacks; /**< List of callbacks to be processed */ -}; -typedef struct orte_gpr_replica_t orte_gpr_replica_t; - -/** - * This structure will be used for all dictionary entries. Instead of using strcmp (& co.) - * we will compute a quick hash value for each string and store this value together with - * the string in the orte_pointer_array. The string have to be the last entry in the - * structure, in order to allow us to allocate it only once. - */ -struct orte_gpr_replica_dict_entry_t { - uint32_t hash_key; /**< the associated hash value */ - uint32_t length; /**< the length of the string computed once */ - char string[1]; /**< the real string (i.e. the structure will have a variable size) */ -}; -typedef struct orte_gpr_replica_dict_entry_t orte_gpr_replica_dict_entry_t; - -/** Registry segment definition. - * The registry is subdivided into segments, each defining a unique domain. The "universe" segment - * is automatically created to allow the exchange of information supporting universe-level functions. - * Similarly, a segment is automatically created for each MPI CommWorld within the universe - the - * name for that segment is stored in each CommWorld's ompi_system_info structure so program - * elements within that CommWorld can access it. The segment structure serves as the "head" of a linked - * list of registry elements for that segment. Each segment also holds its own token-itag dictionary - * to avoid naming conflicts between tokens from CommWorlds sharing a given universe. - */ -struct orte_gpr_replica_segment_t { - opal_object_t super; /**< Make this an object */ - char *name; /**< Name of the segment */ - orte_gpr_replica_itag_t itag; /**< itag of this segment */ - orte_gpr_replica_itag_t num_dict_entries; - orte_pointer_array_t *dict; /**< Managed array of dict structs */ - orte_std_cntr_t num_containers; - orte_pointer_array_t *containers; /**< Managed array of pointers to containers on this segment */ -}; -typedef struct orte_gpr_replica_segment_t orte_gpr_replica_segment_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_segment_t); - - -/** The core registry structure. - * Each segment of the registry contains an array of registry containers, each composed - * of: - * - * (1) An object structure that allows the structure to be treated with the OBJ - * memory management system - * - * (2) An array of itags that define the container - these are 1:1 correspondents with - * the character string tokens provided by caller - * - * (3) An array of indices into the trigger notifier array - each index points to - * a notifier whose trigger refers to this container. - * - * (4) An array of pointers to keyval objects that actually hold the data. - * - * At this time, no security is provided on an object-level basis. Thus, all requests for an - * object are automatically granted. This may be changed at some future time by adding an - * "authorization" linked list of ID's and their access rights to this structure. - */ -struct orte_gpr_replica_container_t { - opal_object_t super; /**< Make this an object */ - orte_std_cntr_t index; /**< Location in the pointer array */ - orte_gpr_replica_itag_t *itags; /**< Array of itags that define this container */ - orte_std_cntr_t num_itags; /**< Number of itags in array */ - orte_pointer_array_t *itagvals; /**< Array of itagval pointers */ - orte_std_cntr_t num_itagvals; /**< Number of itagvals in container */ - orte_value_array_t itaglist; /**< Array of itags from all itagvals - used for rapid search */ -}; -typedef struct orte_gpr_replica_container_t orte_gpr_replica_container_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_container_t); - - -/* The itag-value pair for storing data entries in the registry - */ -typedef struct { - opal_object_t super; /**< required for this to be an object */ - orte_std_cntr_t index; /**< index of this itagval on the container array */ - orte_gpr_replica_itag_t itag; /**< itag for this value's key */ - orte_data_value_t *value; /**< Actual stored value */ -} orte_gpr_replica_itagval_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_itagval_t); - -/* The equivalent of the value structure, only using internal - * itags for the tokens/keys and pointers to internal structures - */ -typedef struct { - opal_object_t super; /**< Makes this an object */ - orte_std_cntr_t index; - /* the segment upon which this data is located */ - orte_gpr_replica_segment_t *seg; - /* describe the data */ - orte_gpr_addr_mode_t addr_mode; /**< Tokens/keys addressing mode */ - orte_value_array_t tokentags; /**< Array of tokens defining which containers are affected */ - orte_value_array_t keytags; /**< Array of keys defining which key-value pairs are affected */ -} orte_gpr_replica_ivalue_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_ivalue_t); - - -typedef struct { - opal_object_t super; - orte_gpr_replica_segment_t *seg; - orte_gpr_replica_container_t *cptr; - orte_gpr_replica_itagval_t *iptr; - orte_gpr_replica_itagval_t trigger_level; -} orte_gpr_replica_counter_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_counter_t); - -typedef struct { - opal_object_t super; - /* index of this entry in requestor array */ - orte_std_cntr_t index; - /* process name of the recipient - set to NULL if local */ - orte_process_name_t *requestor; - /* idtag associated with this subscription */ - orte_gpr_subscription_id_t idtag; -} orte_gpr_replica_requestor_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_requestor_t); - -typedef struct { - opal_object_t super; /**< Makes this an object */ - /* index of this entry in subscription array */ - orte_std_cntr_t index; - /* idtag for the subscription - may be different than index since - * the data type can be different than orte_std_cntr_t - */ - orte_gpr_subscription_id_t idtag; - /* name of this subscription, if provided */ - char *name; - /* boolean indicating if this subscription is active or not */ - bool active; - /* boolean indicating that this subscription is already being - * processed - required to prevent infinite loops should a - * callback function modify the registry - */ - bool processing; - /* boolean indicating that this subscription - * should be removed after processing - * is completed - */ - bool cleanup; - /* action flags describing when the subscription should - * generate a notification message. This can be NULL if - * the subscription only operates in conjunction - * with a trigger - */ - orte_gpr_notify_action_t action; - /* Array of ivalues that describe the data to be - * returned when this subscription is "fired" - */ - orte_std_cntr_t num_values; - orte_pointer_array_t *values; - /* - * Array of requestors that are "attached" to this subscription - */ - orte_std_cntr_t num_requestors; - orte_pointer_array_t *requestors; -} orte_gpr_replica_subscription_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_subscription_t); - - -typedef struct { - opal_object_t super; - /* index of this entry in array */ - orte_std_cntr_t index; - /* process name of the requestor - set to NULL if local */ - orte_process_name_t *requestor; - /* requestor's id for this trigger */ - orte_gpr_trigger_id_t idtag; -} orte_gpr_replica_trigger_requestor_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_trigger_requestor_t); - - -struct orte_gpr_replica_trigger_t { - opal_object_t super; /**< Make this an object */ - /* name of this trigger, if provided */ - char *name; - /* index of this trigger in the triggers array */ - orte_std_cntr_t index; - /* trigger id on the local system */ - orte_gpr_trigger_id_t idtag; - /* array of requestors that have "attached" themselves to this trigger */ - orte_std_cntr_t num_attached; - orte_pointer_array_t *attached; - /* the "master" requestor - if someone asks to have all - * output routed through them, we record their info here - * so we can comply - */ - orte_gpr_replica_trigger_requestor_t *master; - /* the action that causes the trigger to be fired */ - orte_gpr_trigger_action_t action; - /* boolean indicating that this trigger is already being - * processed - required to prevent infinite loops should a - * callback function modify the registry - */ - bool processing; - /* flag that indicates this trigger is a one-shot, has fired and - * now should be cleaned up - */ - bool one_shot_fired; - /* pointers to the counters being monitored. This could - * be counters we are using ourselves, or could be counters being run by someone - * else. For those triggers that fire at a specified level (as opposed to - * comparing values in two or more counters), store the trigger level for - * each counter that we are monitoring until they reach a specified level. - */ - orte_std_cntr_t num_counters; - orte_pointer_array_t *counters; - /* a pointer to the subscriptions associated with this trigger. These - * describe the data that will be returned when the trigger fires, and to - * whom and where it goes. - */ - orte_std_cntr_t num_subscriptions; - orte_pointer_array_t *subscriptions; -}; -typedef struct orte_gpr_replica_trigger_t orte_gpr_replica_trigger_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_trigger_t); - - -/* - * Action taken object - used to track what action was taken against what - * registry object during the course of a registry request. For example, if - * a PUT modifies an existing registry entry, then we store a pointer to that - * entry and a flag indicating that it was modified. This info is required for - * processing notification subscriptions. - */ -typedef struct { - opal_object_t super; /**< Make this an object */ - orte_gpr_replica_action_t action; - orte_gpr_replica_segment_t *seg; - orte_gpr_replica_container_t *cptr; - orte_gpr_replica_itagval_t *iptr; -} orte_gpr_replica_action_taken_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_action_taken_t); - -/* - * Callback list objects - */ -struct orte_gpr_replica_callbacks_t { - opal_list_item_t item; - orte_process_name_t *requestor; - orte_gpr_notify_message_t *message; -}; -typedef struct orte_gpr_replica_callbacks_t orte_gpr_replica_callbacks_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_callbacks_t); - -/** List of replicas that hold a stored entry. - * Each entry can have an arbitrary number of replicas that hold a copy - * of the entry. The GPR requires that each entry be replicated in at least - * two locations. This structure is used to create a linked list of - * replicas for the entry. - * - * THIS IS NOT IMPLEMENTED YET - */ -struct orte_gpr_replica_list_t { - opal_list_item_t item; /**< Allows this item to be placed on a list */ - orte_process_name_t *replica; /**< Name of the replica */ -}; -typedef struct orte_gpr_replica_list_t orte_gpr_replica_list_t; - -OBJ_CLASS_DECLARATION(orte_gpr_replica_list_t); - -/** Write invalidate structure. - * The structure used to indicate that an entry has been updated somewhere else in the GPR. - * The structure contains a flag indicating that the locally stored copy of the entry - * is no longer valid, a time tag indicating the time of the last known modification - * of the entry within the global registry, and the replica holding the last known - * up-to-date version of the entry. - * - * THIS IS NOT IMPLEMENTED YET - */ -struct orte_gpr_replica_write_invalidate_t { - bool invalidate; - time_t last_mod; - orte_process_name_t *valid_replica; -}; -typedef struct orte_gpr_replica_write_invalidate_t orte_gpr_replica_write_invalidate_t; - - -/* - * globals needed within component - */ -extern orte_gpr_replica_t orte_gpr_replica; -extern orte_gpr_replica_globals_t orte_gpr_replica_globals; - -int orte_gpr_replica_ft_event(int state); - -ORTE_MODULE_DECLSPEC extern mca_gpr_base_component_t mca_gpr_replica_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/gpr/replica/gpr_replica_class_instances.h b/orte/mca/gpr/replica/gpr_replica_class_instances.h deleted file mode 100644 index aafebd71f8..0000000000 --- a/orte/mca/gpr/replica/gpr_replica_class_instances.h +++ /dev/null @@ -1,562 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -/** @file - */ - -#ifndef ORTE_GPR_REPLICA_CLASS_INSTANCES_H_ -#define ORTE_GPR_REPLICA_CLASS_INSTANCES_H_ - -#include "orte_config.h" - -#include "gpr_replica.h" - -/* - * CONSTRUCTORS, DESTRUCTORS, AND CLASS INSTANTIATIONS - * FOR GPR REPLICA CLASSES - */ - -/* LOCAL_SUBSCRIBER */ -static void orte_gpr_replica_local_subscriber_constructor(orte_gpr_replica_local_subscriber_t *ptr) -{ - ptr->name = NULL; - ptr->callback = NULL; - ptr->user_tag = NULL; -} - -static void orte_gpr_replica_local_subscriber_destructor(orte_gpr_replica_local_subscriber_t *ptr) -{ - if (NULL != ptr->name) free(ptr->name); -} - -OBJ_CLASS_INSTANCE( - orte_gpr_replica_local_subscriber_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_local_subscriber_constructor, /* constructor */ - orte_gpr_replica_local_subscriber_destructor); /* destructor */ - - -/* LOCAL_TRIGGER */ -static void orte_gpr_replica_local_trigger_constructor(orte_gpr_replica_local_trigger_t *ptr) -{ - ptr->name = NULL; - ptr->callback = NULL; - ptr->user_tag = NULL; -} - -static void orte_gpr_replica_local_trigger_destructor(orte_gpr_replica_local_trigger_t *ptr) -{ - if (NULL != ptr->name) free(ptr->name); -} - -/* define instance */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_local_trigger_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_local_trigger_constructor, /* constructor */ - orte_gpr_replica_local_trigger_destructor); /* destructor */ - - -/* SEGMENT */ -/* constructor - used to initialize state of segment instance */ -static void orte_gpr_replica_segment_construct(orte_gpr_replica_segment_t* seg) -{ - seg->name = NULL; - seg->itag = ORTE_GPR_REPLICA_ITAG_MAX; - - seg->num_dict_entries = 0; - orte_pointer_array_init(&(seg->dict), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); - - seg->num_containers = 0; - orte_pointer_array_init(&(seg->containers), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); - -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_segment_destructor(orte_gpr_replica_segment_t* seg) -{ - orte_std_cntr_t i, k; - orte_gpr_replica_dict_entry_t **dptr; - orte_gpr_replica_itag_t j; - orte_gpr_replica_container_t **cptr; - - if (NULL != seg->name) { - free(seg->name); - } - - if (NULL != seg->dict) { - dptr = (orte_gpr_replica_dict_entry_t**)(seg->dict->addr); - for (i=0, j=0; j < seg->num_dict_entries && - i < (seg->dict)->size; i++) { - if (NULL != dptr[i]) { - j++; - free(dptr[i]); - } - } - OBJ_RELEASE(seg->dict); - } - - if (NULL != seg->containers) { - cptr = (orte_gpr_replica_container_t**)((seg->containers)->addr); - for (i=0, k=0; k < seg->num_containers && - i < (seg->containers)->size; i++) { - if (NULL != cptr[i]) { - k++; - OBJ_RELEASE(cptr[i]); - } - } - OBJ_RELEASE(seg->containers); - } -} - -/* define instance of orte_gpr_replica_segment_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_segment_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_segment_construct, /* constructor */ - orte_gpr_replica_segment_destructor); /* destructor */ - - -/* CONTAINER */ -/* constructor - used to initialize state of registry container instance */ -static void orte_gpr_replica_container_construct(orte_gpr_replica_container_t* reg) -{ - reg->index = 0; - reg->itags = NULL; - reg->num_itags = 0; - - orte_pointer_array_init(&(reg->itagvals), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); - reg->num_itagvals = 0; - - OBJ_CONSTRUCT(&(reg->itaglist), orte_value_array_t); - orte_value_array_init(&(reg->itaglist), sizeof(orte_gpr_replica_itag_t)); - -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_container_destructor(orte_gpr_replica_container_t* reg) -{ - orte_gpr_replica_itagval_t **ptr; - orte_std_cntr_t i, k; - - if (NULL != reg->itags) { - free(reg->itags); - } - - if (NULL != reg->itagvals) { - ptr = (orte_gpr_replica_itagval_t**)((reg->itagvals)->addr); - for (i=0, k=0; k < reg->num_itagvals && - i < (reg->itagvals)->size; i++) { - if (NULL != ptr[i]) { - k++; - OBJ_RELEASE(ptr[i]); - } - } - OBJ_RELEASE(reg->itagvals); - } - - OBJ_DESTRUCT(&(reg->itaglist)); - -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_container_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_container_construct, /* constructor */ - orte_gpr_replica_container_destructor); /* destructor */ - - -/* ITAG-VALUE PAIR */ -/* constructor - used to initialize state of itagval instance */ -static void orte_gpr_replica_itagval_construct(orte_gpr_replica_itagval_t* ptr) -{ - ptr->index = 0; - ptr->itag = ORTE_GPR_REPLICA_ITAG_MAX; - ptr->value = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_itagval_destructor(orte_gpr_replica_itagval_t* ptr) -{ - if (NULL != ptr->value) OBJ_RELEASE(ptr->value); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_itagval_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_itagval_construct, /* constructor */ - orte_gpr_replica_itagval_destructor); /* destructor */ - - -/* IVALUE */ -/* constructor - used to initialize state of ivalue instance */ -static void orte_gpr_replica_ivalue_construct(orte_gpr_replica_ivalue_t* ptr) -{ - ptr->index = 0; - ptr->seg = NULL; - ptr->addr_mode = 0; - - OBJ_CONSTRUCT(&(ptr->tokentags), orte_value_array_t); - orte_value_array_init(&(ptr->tokentags), sizeof(orte_gpr_replica_itag_t)); - - OBJ_CONSTRUCT(&(ptr->keytags), orte_value_array_t); - orte_value_array_init(&(ptr->keytags), sizeof(orte_gpr_replica_itag_t)); - -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_ivalue_destructor(orte_gpr_replica_ivalue_t* ptr) -{ - OBJ_DESTRUCT(&(ptr->tokentags)); - OBJ_DESTRUCT(&(ptr->keytags)); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_ivalue_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_ivalue_construct, /* constructor */ - orte_gpr_replica_ivalue_destructor); /* destructor */ - - -/* COUNTERS */ -/* constructor - used to initialize state of counter instance */ -static void orte_gpr_replica_counter_construct(orte_gpr_replica_counter_t* cntr) -{ - cntr->seg = NULL; - cntr->cptr = NULL; - cntr->iptr = NULL; - OBJ_CONSTRUCT(&(cntr->trigger_level), orte_gpr_replica_itagval_t); -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_counter_destructor(orte_gpr_replica_counter_t* cntr) -{ - OBJ_DESTRUCT(&(cntr->trigger_level)); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_counter_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_counter_construct, /* constructor */ - orte_gpr_replica_counter_destructor); /* destructor */ - - -/* REQUESTOR */ -/* constructor - used to initialize state of requestor instance */ -static void orte_gpr_replica_requestor_construct(orte_gpr_replica_requestor_t* ptr) -{ - ptr->index = 0; - ptr->requestor = NULL; - ptr->idtag = 0; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_requestor_destructor(orte_gpr_replica_requestor_t* ptr) -{ - if (NULL != ptr->requestor) free(ptr->requestor); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_requestor_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_requestor_construct, /* constructor */ - orte_gpr_replica_requestor_destructor); /* destructor */ - - -/* SUBSCRIPTION */ -/* constructor - used to initialize state of subscription instance */ -static void orte_gpr_replica_subscription_construct(orte_gpr_replica_subscription_t* sub) -{ - sub->index = 0; - sub->idtag = ORTE_GPR_SUBSCRIPTION_ID_MAX; - sub->name = NULL; - sub->active = false; - sub->processing = false; - sub->cleanup = false; - sub->action = ORTE_GPR_REPLICA_NO_ACTION; - - sub->num_values = 0; - orte_pointer_array_init(&(sub->values), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); - - sub->num_requestors = 0; - orte_pointer_array_init(&(sub->requestors), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_subscription_destructor(orte_gpr_replica_subscription_t* sub) -{ - orte_gpr_replica_requestor_t **ptr; - orte_gpr_replica_ivalue_t **ivals; - orte_std_cntr_t i, k; - - if (NULL != sub->name) free(sub->name); - - if (NULL != sub->requestors) { - ptr = (orte_gpr_replica_requestor_t**)((sub->requestors)->addr); - for (i=0, k=0; k < sub->num_requestors && - i < (sub->requestors)->size; i++) { - if (NULL != ptr[i]) { - k++; - OBJ_RELEASE(ptr[i]); - } - } - OBJ_RELEASE(sub->requestors); - } - - if (NULL != sub->values) { - ivals = (orte_gpr_replica_ivalue_t**)((sub->values)->addr); - for (i=0, k=0; k < sub->num_values && - i < (sub->values)->size; i++) { - if (NULL != ivals[i]) { - k++; - OBJ_RELEASE(ivals[i]); - } - } - OBJ_RELEASE(sub->values); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_subscription_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_subscription_construct, /* constructor */ - orte_gpr_replica_subscription_destructor); /* destructor */ - - -/* TRIGGER REQUESTOR */ -/* constructor - used to initialize state of instance */ -static void orte_gpr_replica_trigger_requestor_construct(orte_gpr_replica_trigger_requestor_t* ptr) -{ - ptr->index = 0; - ptr->idtag = ORTE_GPR_TRIGGER_ID_MAX; - ptr->requestor = NULL; - ptr->idtag = 0; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_trigger_requestor_destructor(orte_gpr_replica_trigger_requestor_t* ptr) -{ - if (NULL != ptr->requestor) free(ptr->requestor); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_trigger_requestor_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_trigger_requestor_construct, /* constructor */ - orte_gpr_replica_trigger_requestor_destructor); /* destructor */ - - -/* TRIGGER */ -/* constructor - used to initialize state of trigger instance */ -static void orte_gpr_replica_trigger_construct(orte_gpr_replica_trigger_t* trig) -{ - trig->name = NULL; - trig->index = 0; - trig->idtag = ORTE_GPR_TRIGGER_ID_MAX; - - trig->num_attached = 0; - orte_pointer_array_init(&(trig->attached), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); - - trig->master = NULL;; - - trig->action = ORTE_GPR_REPLICA_NO_ACTION; - trig->one_shot_fired = false; - trig->processing = false; - - trig->num_counters = 0; - orte_pointer_array_init(&(trig->counters), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); - - trig->num_subscriptions = 0; - orte_pointer_array_init(&(trig->subscriptions), (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size); - -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_trigger_destructor(orte_gpr_replica_trigger_t* trig) -{ - orte_std_cntr_t i, cnt; - orte_gpr_replica_counter_t **cntrs; - orte_gpr_replica_trigger_requestor_t **att; - - if (NULL != trig->name) { - free(trig->name); - } - - /* must go through the array of atached and release - * the memory for each one prior to releasing the array - */ - if (NULL != trig->attached) { - att = (orte_gpr_replica_trigger_requestor_t**)((trig->attached)->addr); - cnt = 0; - for (i=0; cnt < trig->num_attached && i < (trig->attached)->size; i++) { - if (NULL != att[i]) { - cnt++; - OBJ_RELEASE(att[i]); - } - } - OBJ_RELEASE(trig->attached); - } - - /* must go through the array of counters and release - * the memory for each one prior to releasing the array - */ - if (NULL != trig->counters) { - cntrs = (orte_gpr_replica_counter_t**)((trig->counters)->addr); - cnt = 0; - for (i=0; cnt < trig->num_counters && i < (trig->counters)->size; i++) { - if (NULL != cntrs[i]) { - cnt++; - OBJ_RELEASE(cntrs[i]); - } - } - OBJ_RELEASE(trig->counters); - } - - /* the array of subscriptions is separately maintained, so we - * do NOT release the subscription memory here. We only release - * the array of pointers we were using to reference into the - * subscription array - */ - if (NULL != trig->subscriptions) { - OBJ_RELEASE(trig->subscriptions); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_trigger_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_trigger_construct, /* constructor */ - orte_gpr_replica_trigger_destructor); /* destructor */ - - -/* ACTION_TAKEN */ -/* constructor - used to initialize state of action_take instance */ -static void orte_gpr_replica_action_taken_construct(orte_gpr_replica_action_taken_t* ptr) -{ - ptr->action = ORTE_GPR_REPLICA_NO_ACTION; - ptr->seg = NULL; - ptr->cptr = NULL; - ptr->iptr = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_action_taken_destructor(orte_gpr_replica_action_taken_t* ptr) -{ - /* since we did a "RETAIN" on the objects pointed to by this object, - * we need to "RELEASE" them to indicate we are done with them - */ - if (NULL != ptr->seg) OBJ_RELEASE(ptr->seg); - if (NULL != ptr->cptr) OBJ_RELEASE(ptr->cptr); - if (NULL != ptr->iptr) OBJ_RELEASE(ptr->iptr); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_action_taken_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_gpr_replica_action_taken_construct, /* constructor */ - orte_gpr_replica_action_taken_destructor); /* destructor */ - - -/* CALLBACKS */ -/* constructor - used to initialize state of callback list instance */ -static void orte_gpr_replica_callbacks_construct(orte_gpr_replica_callbacks_t* cb) -{ - cb->message = NULL; - cb->requestor = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_callbacks_destructor(orte_gpr_replica_callbacks_t* cb) -{ - if (NULL != cb->message) OBJ_RELEASE(cb->message); - - if (NULL != cb->requestor) { - free(cb->requestor); - } - -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_callbacks_t, /* type name */ - opal_list_item_t, /* parent "class" name */ - orte_gpr_replica_callbacks_construct, /* constructor */ - orte_gpr_replica_callbacks_destructor); /* destructor */ - - -/* REPLICA LIST - NOT IMPLEMENTED YET! */ -/* constructor - used to initialize state of replica list instance */ -static void orte_gpr_replica_list_construct(orte_gpr_replica_list_t* replica) -{ - replica->replica = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_gpr_replica_list_destructor(orte_gpr_replica_list_t* replica) -{ - if (NULL != replica->replica) { - free(replica->replica); - replica->replica = NULL; - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_list_t, /* type name */ - opal_list_item_t, /* parent "class" name */ - orte_gpr_replica_list_construct, /* constructor */ - orte_gpr_replica_list_destructor); /* destructor */ - - -/* WRITE INVALIDATE - NOT IMPLEMENTED YET! */ -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_gpr_replica_write_invalidate_t, /* type name */ - opal_list_item_t, /* parent "class" name */ - NULL, /* constructor */ - NULL); /* destructor */ - - -#endif /* _GPR_REPLICA_CLASS_INSTANCES_H_ */ - diff --git a/orte/mca/gpr/replica/gpr_replica_component.c b/orte/mca/gpr/replica/gpr_replica_component.c deleted file mode 100644 index 8cb4a30879..0000000000 --- a/orte/mca/gpr/replica/gpr_replica_component.c +++ /dev/null @@ -1,452 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI General Purpose Registry - Replica component - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/util/output.h" -#include "opal/util/trace.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/rml/rml.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/gpr/replica/gpr_replica.h" -#include "orte/mca/gpr/replica/api_layer/gpr_replica_api.h" -#include "orte/mca/gpr/replica/communications/gpr_replica_comm.h" - -/* - * Static functions. - */ -static orte_gpr_base_module_t* -orte_gpr_replica_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority); -static int orte_gpr_replica_finalize(void); -static int orte_gpr_replica_open(void); -static int orte_gpr_replica_close(void); -static int orte_gpr_replica_module_init(void); - -/* - * Struct of function pointers that need to be initialized - */ -mca_gpr_base_component_t mca_gpr_replica_component = { - { - MCA_GPR_BASE_VERSION_1_0_0, - - "replica", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_gpr_replica_open, /* module open */ - orte_gpr_replica_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_gpr_replica_init, /* module init */ - orte_gpr_replica_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static orte_gpr_base_module_t orte_gpr_replica_module = { - /* INIT */ - orte_gpr_replica_module_init, - /* BLOCKING OPERATIONS */ - orte_gpr_replica_get, - orte_gpr_replica_get_conditional, - orte_gpr_replica_put, - orte_gpr_base_put_1, - orte_gpr_base_put_N, - orte_gpr_replica_delete_entries, - orte_gpr_replica_delete_segment, - orte_gpr_replica_index, - /* NON-BLOCKING OPERATIONS */ - orte_gpr_replica_get_nb, - orte_gpr_replica_put_nb, - orte_gpr_replica_delete_entries_nb, - orte_gpr_replica_delete_segment_nb, - orte_gpr_replica_index_nb, - /* GENERAL OPERATIONS */ - orte_gpr_base_create_value, - orte_gpr_base_create_keyval, - orte_gpr_replica_preallocate_segment, - orte_gpr_replica_get_number_entries, - orte_gpr_replica_deliver_notify_msg, - /* ARITHMETIC OPERATIONS */ - orte_gpr_replica_arith, - orte_gpr_replica_increment_value, - orte_gpr_replica_decrement_value, - /* SUBSCRIBE OPERATIONS */ - orte_gpr_replica_subscribe, - orte_gpr_base_subscribe_1, - orte_gpr_base_subscribe_N, - orte_gpr_base_define_trigger, - orte_gpr_base_define_trigger_level, - orte_gpr_replica_unsubscribe, - orte_gpr_replica_cancel_trigger, - /* COMPOUND COMMANDS */ - orte_gpr_replica_begin_compound_cmd, - orte_gpr_replica_stop_compound_cmd, - orte_gpr_replica_exec_compound_cmd, - orte_gpr_replica_process_compound_cmd, - /* DIAGNOSTIC OPERATIONS */ - orte_gpr_replica_dump_all, - orte_gpr_replica_dump_segments, - orte_gpr_replica_dump_triggers, - orte_gpr_replica_dump_subscriptions, - orte_gpr_replica_dump_a_trigger, - orte_gpr_replica_dump_a_subscription, - orte_gpr_replica_dump_local_triggers, - orte_gpr_replica_dump_local_subscriptions, - orte_gpr_replica_dump_callbacks, - orte_gpr_replica_dump_notify_msg, - orte_gpr_replica_dump_notify_data, - orte_gpr_replica_dump_value, - orte_gpr_replica_dump_segment_size, - /* CLEANUP OPERATIONS */ - orte_gpr_replica_cleanup_job, - orte_gpr_replica_cleanup_proc, - orte_gpr_replica_ft_event -}; - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - - -/* - * globals needed within replica component - */ -orte_gpr_replica_t orte_gpr_replica; - -orte_gpr_replica_globals_t orte_gpr_replica_globals; - -/* instantiate the classes */ -#include "orte/mca/gpr/replica/gpr_replica_class_instances.h" - -static int orte_gpr_replica_open(void) -{ - int id, tmp; - - OPAL_TRACE(5); - - id = mca_base_param_register_int("gpr", "replica", "debug", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_gpr_replica_globals.debug = true; - } else { - orte_gpr_replica_globals.debug = false; - } - - id = mca_base_param_register_int("gpr", "replica", "isolate", NULL, 0); - mca_base_param_lookup_int(id, &tmp); - if (tmp) { - orte_gpr_replica_globals.isolate = true; - } else { - orte_gpr_replica_globals.isolate = false; - } - - return ORTE_SUCCESS; -} - -/* - * close function - */ -static int orte_gpr_replica_close(void) -{ - OPAL_TRACE(5); - - return ORTE_SUCCESS; -} - -static orte_gpr_base_module_t* -orte_gpr_replica_init(bool *allow_multi_user_threads, bool *have_hidden_threads, - int *priority) -{ - int rc; - - OPAL_TRACE(5); - - /* If we are to host a replica, then we want to be selected, so do all the - setup and return the module */ - - if (NULL == orte_process_info.gpr_replica_uri) { - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other ns components). If - we're not the seed, then we don't want to be selected, so - return NULL. */ - - *priority = 50; - - /* We allow multi user threads but don't have any hidden threads */ - - *allow_multi_user_threads = true; - *have_hidden_threads = false; - - /* setup the thread locks and condition variables */ - OBJ_CONSTRUCT(&orte_gpr_replica_globals.mutex, opal_mutex_t); - - /* initialize the registry head */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica.segments), - (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_gpr_replica.num_segs = 0; - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica.triggers), - (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_gpr_replica.num_trigs = 0; - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica.subscriptions), - (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_gpr_replica.num_subs = 0; - - /* initialize the callback list head */ - OBJ_CONSTRUCT(&orte_gpr_replica.callbacks, opal_list_t); - orte_gpr_replica.processing_callbacks = false; - - /* initialize the local subscription and trigger trackers */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_init( - &(orte_gpr_replica_globals.local_subscriptions), - (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_gpr_replica_globals.num_local_subs = 0; - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init( - &(orte_gpr_replica_globals.local_triggers), - (orte_std_cntr_t)orte_gpr_array_block_size, - (orte_std_cntr_t)orte_gpr_array_max_size, - (orte_std_cntr_t)orte_gpr_array_block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_gpr_replica_globals.num_local_trigs = 0; - - /* initialize the search arrays for temporarily storing search results */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.sub_ptrs), - 100, (orte_std_cntr_t)orte_gpr_array_max_size, 100))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.srch_cptr), - 100, (orte_std_cntr_t)orte_gpr_array_max_size, 100))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_gpr_replica_globals.num_srch_cptr = 0; - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.overwritten), - 20, (orte_std_cntr_t)orte_gpr_array_max_size, 20))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_gpr_replica_globals.num_overwritten = 0; - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.srch_ival), - 100, (orte_std_cntr_t)orte_gpr_array_max_size, 100))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_gpr_replica_globals.num_srch_ival = 0; - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.acted_upon), - 100, (orte_std_cntr_t)orte_gpr_array_max_size, 100))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_gpr_replica_globals.num_acted_upon = 0; - - if (orte_gpr_replica_globals.debug) { - opal_output(0, "nb receive setup"); - } - - /* Return the module */ - - initialized = true; - return &orte_gpr_replica_module; - } - return NULL; -} - -static int orte_gpr_replica_module_init(void) -{ - OPAL_TRACE(5); - - /* issue the non-blocking receive */ - if (!orte_gpr_replica_globals.isolate) { - int rc = orte_rml.recv_buffer_nb( - ORTE_NAME_WILDCARD, ORTE_RML_TAG_GPR, ORTE_RML_PERSISTENT, orte_gpr_replica_recv, NULL); - if(rc < 0) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - return ORTE_SUCCESS; -} - - -/* - * finalize routine - */ -static int orte_gpr_replica_finalize(void) -{ - orte_std_cntr_t i; - orte_gpr_subscription_id_t j; - orte_gpr_trigger_id_t k; - orte_gpr_replica_segment_t** seg; - orte_gpr_replica_trigger_t** trig; - orte_gpr_replica_subscription_t** subs; - orte_gpr_replica_callbacks_t* cb; - orte_gpr_replica_local_subscriber_t **lsubs; - orte_gpr_replica_local_trigger_t **ltrigs; - - OPAL_TRACE(5); - - /* destruct the thread lock */ - OBJ_DESTRUCT(&orte_gpr_replica_globals.mutex); - - if (NULL != orte_gpr_replica.segments) { - seg = (orte_gpr_replica_segment_t**)(orte_gpr_replica.segments)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_segs && - i < (orte_gpr_replica.segments)->size; i++) { - if (NULL != seg[i]) { - j++; - OBJ_RELEASE(seg[i]); - } - } - OBJ_RELEASE(orte_gpr_replica.segments); - } - - if (NULL != orte_gpr_replica.triggers) { - trig = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_trigs && - i < (orte_gpr_replica.triggers)->size; i++) { - if (NULL != trig[i]) { - j++; - OBJ_RELEASE(trig[i]); - } - } - OBJ_RELEASE(orte_gpr_replica.triggers); - } - - if (NULL != orte_gpr_replica.subscriptions) { - subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr; - for (i=0, j=0; j < orte_gpr_replica.num_subs && - i < (orte_gpr_replica.subscriptions)->size; i++) { - if (NULL != subs[i]) { - j++; - OBJ_RELEASE(subs[i]); - } - } - OBJ_RELEASE(orte_gpr_replica.subscriptions); - } - - while (NULL != (cb = (orte_gpr_replica_callbacks_t*)opal_list_remove_first(&orte_gpr_replica.callbacks))) { - OBJ_RELEASE(cb); - } - OBJ_DESTRUCT(&orte_gpr_replica.callbacks); - - - /* clear the local subscriptions and triggers */ - if (NULL != orte_gpr_replica_globals.local_subscriptions) { - lsubs = (orte_gpr_replica_local_subscriber_t**)(orte_gpr_replica_globals.local_subscriptions)->addr; - for (i=0, k=0; k < orte_gpr_replica_globals.num_local_subs && - i < (orte_gpr_replica_globals.local_subscriptions)->size; i++) { - if (NULL != lsubs[i]) { - k++; - OBJ_RELEASE(lsubs[i]); - } - } - OBJ_RELEASE(orte_gpr_replica_globals.local_subscriptions); - } - - if (NULL != orte_gpr_replica_globals.local_triggers) { - ltrigs = (orte_gpr_replica_local_trigger_t**)(orte_gpr_replica_globals.local_triggers)->addr; - for (i=0, j=0; j < orte_gpr_replica_globals.num_local_trigs && - i < (orte_gpr_replica_globals.local_triggers)->size; i++) { - if (NULL != ltrigs[i]) { - j++; - OBJ_RELEASE(ltrigs[i]); - } - } - OBJ_RELEASE(orte_gpr_replica_globals.local_triggers); - } - - /* clean up the globals */ - - if (NULL != orte_gpr_replica_globals.srch_cptr) { - OBJ_RELEASE(orte_gpr_replica_globals.srch_cptr); - } - - if (NULL != orte_gpr_replica_globals.overwritten) { - OBJ_RELEASE(orte_gpr_replica_globals.overwritten); - } - - if (NULL != orte_gpr_replica_globals.sub_ptrs) { - OBJ_RELEASE(orte_gpr_replica_globals.sub_ptrs); - } - - if (NULL != orte_gpr_replica_globals.srch_ival) { - OBJ_RELEASE(orte_gpr_replica_globals.srch_ival); - } - - if (NULL != orte_gpr_replica_globals.acted_upon) { - OBJ_RELEASE(orte_gpr_replica_globals.acted_upon); - } - - /* All done */ - if (orte_gpr_replica_globals.isolate) { - return ORTE_SUCCESS; - } - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_GPR); - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/replica/transition_layer/gpr_replica_dict_tl.c b/orte/mca/gpr/replica/transition_layer/gpr_replica_dict_tl.c deleted file mode 100644 index 73bb657a6e..0000000000 --- a/orte/mca/gpr/replica/transition_layer/gpr_replica_dict_tl.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - support functions. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/hash_string.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/class/orte_pointer_array.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/gpr/replica/gpr_replica.h" -#include "orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h" -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" - -int -orte_gpr_replica_create_itag(orte_gpr_replica_itag_t *itag, - orte_gpr_replica_segment_t *seg, char *name) -{ - orte_gpr_replica_dict_entry_t* new_dict; - orte_std_cntr_t index; - uint32_t len, hash_key; - int rc; - - OPAL_TRACE(3); - - /* default to illegal value */ - *itag = ORTE_GPR_REPLICA_ITAG_MAX; - - /* if name or seg is NULL, error */ - if (NULL == name || NULL == seg) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - if( ORTE_ERR_NOT_FOUND != (rc = orte_gpr_replica_dict_lookup(itag, seg, name))) { - return rc; - } - - OPAL_HASH_STRLEN( name, hash_key, len ); - - /* okay, name is unique - create dictionary entry */ - new_dict = (orte_gpr_replica_dict_entry_t*)malloc( sizeof(orte_gpr_replica_dict_entry_t) + len ); - new_dict->hash_key = hash_key; - new_dict->length = len; - strncpy( new_dict->string, name, len ); - new_dict->string[len] = '\0'; - - if (0 > orte_pointer_array_add(&index, seg->dict, (void*)new_dict)) { - free(new_dict); - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if ((orte_gpr_replica_itag_t)index < ORTE_GPR_REPLICA_ITAG_MAX) { - *itag = (orte_gpr_replica_itag_t)index; - (seg->num_dict_entries)++; - return ORTE_SUCCESS; - } - - /* otherwise, the itag violates the max value */ - free(new_dict); - /* remove itag from segment dictionary */ - orte_pointer_array_set_item(seg->dict, (orte_std_cntr_t)index, NULL); - - return ORTE_ERR_OUT_OF_RESOURCE; -} - - -int orte_gpr_replica_delete_itag(orte_gpr_replica_segment_t *seg, char *name) -{ - orte_gpr_replica_dict_entry_t **ptr; - orte_gpr_replica_itag_t itag; - int rc; - - OPAL_TRACE(3); - - /* check for errors */ - if (NULL == name || NULL == seg) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* find dictionary element to delete */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_lookup(&itag, seg, name))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* found name in dictionary */ - /* need to search this segment's registry to find all instances - * that name & delete them - */ - if (ORTE_SUCCESS != (rc = orte_gpr_replica_purge_itag(seg, itag))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* free the dictionary element data */ - ptr = (orte_gpr_replica_dict_entry_t**)(seg->dict->addr); - if (NULL == ptr[itag]) { /* dict element no longer valid */ - return ORTE_ERR_NOT_FOUND; - } - free(ptr[itag]); - - /* remove itag from segment dictionary */ - orte_pointer_array_set_item(seg->dict, (orte_std_cntr_t)itag, NULL); - - /* decrease the dict counter */ - (seg->num_dict_entries)--; - - return ORTE_SUCCESS; -} - - -int -orte_gpr_replica_dict_lookup(orte_gpr_replica_itag_t *itag, - orte_gpr_replica_segment_t *seg, char *name) -{ - orte_gpr_replica_dict_entry_t **ptr; - orte_std_cntr_t i; - orte_gpr_replica_itag_t j; - uint32_t hash_key, len; - - OPAL_TRACE(3); - - /* initialize to illegal value */ - *itag = ORTE_GPR_REPLICA_ITAG_MAX; - - /* protect against error */ - if (NULL == seg) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - if (NULL == name) { /* just want segment token-itag pair */ - *itag = seg->itag; - return ORTE_SUCCESS; - } - - OPAL_HASH_STRLEN( name, hash_key, len ); - - ptr = (orte_gpr_replica_dict_entry_t**)(seg->dict)->addr; - for (i=0, j=0; j < seg->num_dict_entries && - i < (seg->dict)->size; i++) { - if (NULL == ptr[i]) continue; - j++; - if( (len == ptr[i]->length) && (hash_key == ptr[i]->hash_key) - && (0 == strncmp(ptr[i]->string, name, len)) ) { - /* already present */ - if (i < ORTE_GPR_REPLICA_ITAG_MAX) { - *itag = (orte_gpr_replica_itag_t)i; - return ORTE_SUCCESS; - } - /* otherwise, the itag violates the max value */ - return ORTE_ERR_BAD_PARAM; - } - } - - return ORTE_ERR_NOT_FOUND; /* couldn't find the specified entry */ -} - - -int orte_gpr_replica_dict_reverse_lookup(char **name, - orte_gpr_replica_segment_t *seg, orte_gpr_replica_itag_t itag) -{ - orte_gpr_replica_dict_entry_t **ptr; - orte_gpr_replica_segment_t **segptr; - - OPAL_TRACE(3); - - /* initialize to nothing */ - *name = NULL; - - /* protect against error (shouldn't happen) */ - if (ORTE_GPR_REPLICA_ITAG_MAX == itag) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - if (NULL == seg) { - /* return the segment name - * note that itag is the index of the segment in that array - */ - segptr = (orte_gpr_replica_segment_t**)(orte_gpr_replica.segments->addr); - if (NULL == segptr[itag]) { /* this segment is no longer alive */ - return ORTE_ERR_NOT_FOUND; - } - *name = strdup(segptr[itag]->name); - return ORTE_SUCCESS; - } - - /* seg is provided - find the matching token for this itag - * note again that itag is the index into this segment's - * dictionary array - */ - ptr = (orte_gpr_replica_dict_entry_t**)((seg->dict)->addr); - if (NULL == ptr[itag]) { /* this entry is no longer valid! */ - return ORTE_ERR_NOT_FOUND; - } - *name = strdup(ptr[itag]->string); - - return ORTE_SUCCESS; -} - -int -orte_gpr_replica_get_itag_list( orte_gpr_replica_itag_t **itaglist, - orte_gpr_replica_segment_t *seg, char **names, - orte_std_cntr_t *num_names ) -{ - char **namptr; - int rc; - orte_std_cntr_t i; - - OPAL_TRACE(3); - - *itaglist = NULL; - - /* check for errors */ - if (NULL == seg) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* check for wild-card case */ - if (NULL == names) { - return ORTE_SUCCESS; - } - - if (0 >= (*num_names)) { /* NULL-terminated list - count them */ - *num_names = 0; - namptr = names; - while (NULL != *namptr) { - *num_names = (*num_names) + 1; - namptr++; - } - } - - *itaglist = (orte_gpr_replica_itag_t*)malloc((*num_names)*sizeof(orte_gpr_replica_itag_t)); - if (NULL == *itaglist) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - namptr = names; - - for( i = 0; i < (*num_names); i++ ) { /* traverse array of names - ignore any NULL's */ - if (NULL == names[i]) continue; - if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_itag(&((*itaglist)[i]), seg, names[i]))) { - ORTE_ERROR_LOG(rc); - free(*itaglist); - *itaglist = NULL; - return rc; - } - } - return ORTE_SUCCESS; -} - diff --git a/orte/mca/gpr/replica/transition_layer/gpr_replica_segment_tl.c b/orte/mca/gpr/replica/transition_layer/gpr_replica_segment_tl.c deleted file mode 100644 index b53408415f..0000000000 --- a/orte/mca/gpr/replica/transition_layer/gpr_replica_segment_tl.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - support functions. - * - */ - -/* - * includes - */ - -#include "orte_config.h" - -#include "opal/util/trace.h" - -#include "orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h" - -int orte_gpr_replica_find_seg(orte_gpr_replica_segment_t **seg, - bool create, char *segment) -{ - int rc=ORTE_SUCCESS; - orte_std_cntr_t i, cntri; - orte_gpr_replica_segment_t **ptr; - - OPAL_TRACE(3); - - /* initialize to nothing */ - *seg = NULL; - - if (NULL == segment) { - /* this is an allowed value - the index function, for example, - * will pass this to us if we want the index of the global level - * of the registry (i.e., the index of segment names). Just return - * NULL and we'll be okay - */ - return ORTE_SUCCESS; - } - - /* search the registry segments to find which one is being referenced */ - ptr = (orte_gpr_replica_segment_t**)(orte_gpr_replica.segments->addr); - cntri = 0; - for (i=0; cntri < orte_gpr_replica.num_segs && - i < (orte_gpr_replica.segments)->size; i++) { - if (NULL != ptr[i]) { - cntri++; - if (0 == strcmp(segment, ptr[i]->name)) { - *seg = ptr[i]; - return ORTE_SUCCESS; - } - } - } - - if (!create) { - /* couldn't find it and don't want it created - just return NULL */ - return ORTE_ERR_BAD_PARAM; - } - - /* add the segment to the registry */ - *seg = OBJ_NEW(orte_gpr_replica_segment_t); - (*seg)->name = strdup(segment); - if (0 > orte_pointer_array_add(&i, orte_gpr_replica.segments, (void*)(*seg))) { - OBJ_RELEASE(*seg); - return rc; - } - (*seg)->itag = i; - (orte_gpr_replica.num_segs)++; - - return ORTE_SUCCESS; -} diff --git a/orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h b/orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h deleted file mode 100644 index 83582cabfe..0000000000 --- a/orte/mca/gpr/replica/transition_layer/gpr_replica_tl.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - support functions. - * - */ - -#ifndef MCA_GPR_REPLICA_TL_H_ -#define MCA_GPR_REPLICA_TL_H_ - -#include "orte_config.h" - -#include "orte/mca/gpr/replica/gpr_replica.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * DICTIONARY OPERATIONS - */ - -/** Add a name to a segment's dictionary. - * This function allows the addition of a new definition to - * the registry's dictionaries. The specified string is assigned an integer - * value within the specified segment, and the entry is added to the segment's token-itag - * dictionary. - * - * @param *seg Pointer to the segment of the registry. - * @param name Pointer to a character string containing the string to be defined. - * - * @param *itag (OUT) orte_gpr_replica_itag_t value of corresponding name. - * - * @retval ORTE_SUCCESS Operation successful - * @retval ORTE_ERROR(s) Indicates that the dictionary is full or some other error. - */ -int orte_gpr_replica_create_itag(orte_gpr_replica_itag_t *itag, - orte_gpr_replica_segment_t *seg, char *name); -/** - * Typedef of the orte_gpr_replica_create_itag() function signature so - * that it can be invoked via a function pointer for a unit test. - */ -typedef int (*orte_gpr_replica_create_itag_fn_t) - (orte_gpr_replica_itag_t *itag, - orte_gpr_replica_segment_t *seg, char *name); - -/** Delete a name from a segment's dictionary. - * This function allows the removal of a definition from the - * registry's dictionaries. This should be used with caution! Deletion of - * a name causes the registry to search through all entries within that segment - * for entries that include the specified name in their description. The reference - * is subsequently removed, and any entry for which this was the SOLE descriptor will also - * be removed from the registry! - * - * @param *seg Pointer to the segment of the registry. - * @param name Pointer to a character string containing the name to be deleted. - * - * @retval ORTE_SUCCESS Indicating that the operation was successful. - * @retval ORTE_ERROR Indicates that the operation failed - most likely caused by specifying - * a name that did not exist within the specified segment, or a non-existent segment. - */ -int orte_gpr_replica_delete_itag(orte_gpr_replica_segment_t *seg, char *name); - -/** - * Typedef of the orte_gpr_replica_delete_itag() function signature so - * that it can be invoked via a function pointer for a unit test. - */ -typedef int (*orte_gpr_replica_delete_itag_fn_t) - (orte_gpr_replica_segment_t *seg, char *name); - - -int orte_gpr_replica_dict_lookup(orte_gpr_replica_itag_t *itag, - orte_gpr_replica_segment_t *seg, char *name); -/** - * Typedef of the orte_gpr_replica_dict_lookup() function signature so - * that it can be invoked via a function pointer for a unit test. - */ -typedef int (*orte_gpr_replica_dict_lookup_fn_t) - (orte_gpr_replica_itag_t *itag, - orte_gpr_replica_segment_t *seg, char *name); - - -int orte_gpr_replica_dict_reverse_lookup(char **name, - orte_gpr_replica_segment_t *seg, orte_gpr_replica_itag_t itag); -/** - * Typedef of the orte_gpr_replica_dict_reverse_lookup() function - * signature so that it can be invoked via a function pointer for a - * unit test. - */ -typedef int (*orte_gpr_replica_dict_reverse_lookup_fn_t) - (char **name, - orte_gpr_replica_segment_t *seg, orte_gpr_replica_itag_t itag); - - -/* - * Get a list of itags for a list of string names - * Given a list of string names, this function will look them up in the specified - * segment's dictionary to see if they exist. If they do, then the function adds the - * corresponding itag to the list of itag values that will be returned to the caller. - * If the string doesn't exist in the dictionary, then the function has a dictionary - * entry created and includes that itag in the return list. - * - * Providing a value of NULL for the list of string names will return a NULL list of - * itags. This corresponds to a wildcard case. The function will return the NULL list - * and provide an ORTE_SUCCESS response to the caller. - * - * Providing a name with a wildcard character in it will cause the function to look - * for all itag values that match the specified pattern. In this case, no new dictionary - * elements will be created - calls to get_itag_list that include wildcards and generate - * no itag values to be returned to the caller will result in an ORTE_ERR_NOT_FOUND - * response. - * - */ -int orte_gpr_replica_get_itag_list(orte_gpr_replica_itag_t **itaglist, - orte_gpr_replica_segment_t *seg, char **names, - orte_std_cntr_t *num_names); - -/** - * Typedef of the orte_gpr_replica_get_itag_list() function signature - * so that it can be invoked via a function pointer for a unit test. - */ -typedef int (*orte_gpr_replica_get_itag_list_fn_t) - (orte_gpr_replica_itag_t **itaglist, - orte_gpr_replica_segment_t *seg, char **names, - orte_std_cntr_t *num_names); - - -/* - * SEGMENT OPERATIONS - */ - -/** Find a requested registry segment. - * The function finds the registry segment corresponding to - * the specified name. - * - * @param *seg (OUT) Pointer to the segment - * @param create (IN) A boolean that indicates whether or not to create the segment if it - * doesn't already exist. TRUE => create it, FALSE => don't create it. - * @param segment (IN) Pointer to a string containing the name of the segment to be found. - * - * @retval ORTE_SUCCESS Operation successful - * @retval ORTE_ERROR(s) Appropriate error code returned - */ -int orte_gpr_replica_find_seg(orte_gpr_replica_segment_t **seg, - bool create, char *segment); - -/** - * Typedef of the orte_gpr_replica_find_seg() function signature so - * that it can be invoked via a function pointer for a unit test. - */ -typedef int (*orte_gpr_replica_find_seg_fn_t) - (orte_gpr_replica_segment_t **seg, - bool create, char *segment); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/grpcomm/Makefile.am b/orte/mca/grpcomm/Makefile.am index fb38b1575e..8bf7af9d78 100644 --- a/orte/mca/grpcomm/Makefile.am +++ b/orte/mca/grpcomm/Makefile.am @@ -24,7 +24,9 @@ libmca_grpcomm_la_SOURCES = nobase_orte_HEADERS = # local files -headers = grpcomm.h +headers = grpcomm.h \ + grpcomm_types.h + libmca_grpcomm_la_SOURCES += $(headers) # Conditionally install the header files diff --git a/orte/mca/grpcomm/base/base.h b/orte/mca/grpcomm/base/base.h index 10ce4602ee..6b233109ca 100644 --- a/orte/mca/grpcomm/base/base.h +++ b/orte/mca/grpcomm/base/base.h @@ -25,7 +25,6 @@ * includes */ #include "orte_config.h" -#include "orte/orte_constants.h" #include "opal/class/opal_list.h" #include "opal/mca/mca.h" @@ -36,10 +35,7 @@ /* * Global functions for MCA overall collective open and close */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - +BEGIN_C_DECLS /* * function definitions @@ -61,7 +57,5 @@ ORTE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_base_selected_com * external API functions will be documented in the mca/grpcomm/grpcomm.h file */ -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/grpcomm/base/grpcomm_base_open.c b/orte/mca/grpcomm/base/grpcomm_base_open.c index 0e8422109b..bec635ab09 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_open.c +++ b/orte/mca/grpcomm/base/grpcomm_base_open.c @@ -18,7 +18,7 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" @@ -53,17 +53,9 @@ orte_grpcomm_base_component_t mca_grpcomm_base_selected_component; */ int orte_grpcomm_base_open(void) { - int value; - - /* Debugging / verbose output */ - mca_base_param_reg_int_name("grpcomm", "base_verbose", - "Verbosity level for the grpcomm framework", - false, false, 0, &value); - if (value != 0) { - orte_grpcomm_base_output = opal_output_open(NULL); - } else { - orte_grpcomm_base_output = -1; - } + /* Debugging / verbose output. Always have stream open, with + verbose set by the mca open system... */ + orte_grpcomm_base_output = opal_output_open(NULL); /* Open up all available components */ diff --git a/orte/mca/grpcomm/base/grpcomm_base_select.c b/orte/mca/grpcomm/base/grpcomm_base_select.c index dd4b6371cb..4fb69bc44b 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_select.c +++ b/orte/mca/grpcomm/base/grpcomm_base_select.c @@ -94,7 +94,7 @@ int orte_grpcomm_base_select(void) orte_grpcomm = *best_module; mca_grpcomm_base_selected_component = *best_component; mca_grpcomm_base_selected = true; - + /* all done */ return ORTE_SUCCESS; diff --git a/orte/mca/grpcomm/basic/configure.m4 b/orte/mca/grpcomm/basic/configure.m4 new file mode 100644 index 0000000000..e925e6b9b0 --- /dev/null +++ b/orte/mca/grpcomm/basic/configure.m4 @@ -0,0 +1,13 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Sandia National Laboratories. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_grpcomm_basic_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_grpcomm_basic_CONFIG], [$1]) diff --git a/orte/mca/grpcomm/basic/configure.params b/orte/mca/grpcomm/basic/configure.params index 3513f8d956..e9ed18fd2f 100644 --- a/orte/mca/grpcomm/basic/configure.params +++ b/orte/mca/grpcomm/basic/configure.params @@ -22,3 +22,9 @@ # Specific to this module PARAM_CONFIG_FILES="Makefile" +# +# Set the config priority so that this +# component will build for all environs -except- +# those special ones that do not support it + +PARAM_CONFIG_PRIORITY=10 diff --git a/orte/mca/grpcomm/basic/grpcomm_basic.h b/orte/mca/grpcomm/basic/grpcomm_basic.h index 5094380698..2a52614b48 100644 --- a/orte/mca/grpcomm/basic/grpcomm_basic.h +++ b/orte/mca/grpcomm/basic/grpcomm_basic.h @@ -21,8 +21,7 @@ #define GRPCOMM_BASIC_H #include "orte_config.h" -#include "orte/orte_types.h" -#include "orte/orte_constants.h" +#include "orte/types.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" @@ -39,11 +38,14 @@ BEGIN_C_DECLS * globals needed within component */ typedef struct { - int xcast_linear_xover; - int xcast_binomial_xover; + orte_vpid_t xcast_linear_xover; + orte_vpid_t xcast_binomial_xover; orte_std_cntr_t num_active; opal_mutex_t mutex; opal_condition_t cond; + opal_hash_table_t modex_data; + opal_buffer_t modex_buffer; + orte_std_cntr_t modex_num_entries; } orte_grpcomm_basic_globals_t; extern orte_grpcomm_basic_globals_t orte_grpcomm_basic; diff --git a/orte/mca/grpcomm/basic/grpcomm_basic_component.c b/orte/mca/grpcomm/basic/grpcomm_basic_component.c index fb3db6e399..8cd7ce4b96 100644 --- a/orte/mca/grpcomm/basic/grpcomm_basic_component.c +++ b/orte/mca/grpcomm/basic/grpcomm_basic_component.c @@ -30,9 +30,7 @@ * includes */ #include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" +#include "orte/constants.h" #include "opal/threads/mutex.h" #include "opal/class/opal_list.h" @@ -47,11 +45,7 @@ #include "grpcomm_basic.h" -/* set the default xovers to always force linear - * this is a tmp workaround for a problem in the - * rml that prevents the daemons from sending - * messages to their local procs - */ +/* set the default xovers */ #define XCAST_LINEAR_XOVER_DEFAULT 2 #define XCAST_BINOMIAL_XOVER_DEFAULT 16 @@ -88,19 +82,17 @@ int orte_grpcomm_basic_open(void) { char *mode; mca_base_component_t *c = &mca_grpcomm_basic_component.grpcomm_version; - - /* initialize globals */ - OBJ_CONSTRUCT(&orte_grpcomm_basic.mutex, opal_mutex_t); - OBJ_CONSTRUCT(&orte_grpcomm_basic.cond, opal_condition_t); - orte_grpcomm_basic.num_active = 0; - + int tmp; + mca_base_param_reg_int(c, "xcast_linear_xover", "Number of daemons where use of linear xcast mode is to begin", - false, false, XCAST_LINEAR_XOVER_DEFAULT, &orte_grpcomm_basic.xcast_linear_xover); + false, false, XCAST_LINEAR_XOVER_DEFAULT, &tmp); + orte_grpcomm_basic.xcast_linear_xover = tmp; mca_base_param_reg_int(c, "xcast_binomial_xover", "Number of daemons where use of binomial xcast mode is to begin", - false, false, XCAST_BINOMIAL_XOVER_DEFAULT, &orte_grpcomm_basic.xcast_binomial_xover); + false, false, XCAST_BINOMIAL_XOVER_DEFAULT, &tmp); + orte_grpcomm_basic.xcast_binomial_xover = tmp; mca_base_param_reg_string(c, "xcast_mode", "Select xcast mode (\"linear\" | \"binomial\" | \"direct\")", @@ -124,14 +116,22 @@ int orte_grpcomm_basic_open(void) /* Close the component */ int orte_grpcomm_basic_close(void) { - OBJ_DESTRUCT(&orte_grpcomm_basic.mutex); - OBJ_DESTRUCT(&orte_grpcomm_basic.cond); - return ORTE_SUCCESS; } orte_grpcomm_base_module_t* orte_grpcomm_basic_init(int *priority) { + /* initialize globals */ + OBJ_CONSTRUCT(&orte_grpcomm_basic.mutex, opal_mutex_t); + OBJ_CONSTRUCT(&orte_grpcomm_basic.cond, opal_condition_t); + orte_grpcomm_basic.num_active = 0; + + OBJ_CONSTRUCT(&orte_grpcomm_basic.modex_data, opal_hash_table_t); + OBJ_CONSTRUCT(&orte_grpcomm_basic.modex_buffer, opal_buffer_t); + orte_grpcomm_basic.modex_num_entries = 0; + + opal_hash_table_init(&orte_grpcomm_basic.modex_data, 256); + /* we are the default, so set a low priority so we can be overridden */ *priority = 1; @@ -143,5 +143,12 @@ orte_grpcomm_base_module_t* orte_grpcomm_basic_init(int *priority) */ int orte_grpcomm_basic_finalize(void) { + OBJ_DESTRUCT(&orte_grpcomm_basic.mutex); + OBJ_DESTRUCT(&orte_grpcomm_basic.cond); + + opal_hash_table_remove_all(&orte_grpcomm_basic.modex_data); + OBJ_DESTRUCT(&orte_grpcomm_basic.modex_data); + + OBJ_DESTRUCT(&orte_grpcomm_basic.modex_buffer); return ORTE_SUCCESS; } diff --git a/orte/mca/grpcomm/basic/grpcomm_basic_module.c b/orte/mca/grpcomm/basic/grpcomm_basic_module.c index d85987b675..85e1faf355 100644 --- a/orte/mca/grpcomm/basic/grpcomm_basic_module.c +++ b/orte/mca/grpcomm/basic/grpcomm_basic_module.c @@ -18,7 +18,8 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" +#include "orte/types.h" #include #ifdef HAVE_SYS_TIME_H @@ -29,150 +30,42 @@ #include "opal/util/output.h" #include "opal/util/bit_ops.h" +#include "orte/class/orte_proc_table.h" #include "orte/util/proc_info.h" -#include "orte/dss/dss.h" -#include "orte/mca/gpr/gpr.h" +#include "opal/dss/dss.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/odls/odls_types.h" #include "orte/mca/rml/rml.h" -#include "orte/runtime/params.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/orted/orted.h" #include "orte/mca/grpcomm/base/base.h" #include "grpcomm_basic.h" + /* Local functions */ static int xcast_binomial_tree(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag); static int xcast_linear(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag); static int xcast_direct(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag); -/* define a callback function for use by the blocking version - * of xcast so we can "hold" the caller here until all non-blocking - * sends have completed - */ -static void xcast_send_cb(int status, - orte_process_name_t* peer, - orte_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); - - /* release the buffer */ - OBJ_RELEASE(buffer); - - orte_grpcomm_basic.num_active--; - if (orte_grpcomm_basic.num_active <= 0) { - orte_grpcomm_basic.num_active = 0; /* just to be safe */ - opal_condition_signal(&orte_grpcomm_basic.cond); - } - OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); - return; -} - /** * A "broadcast-like" function to a job's processes. * @param jobid The job whose processes are to receive the message * @param buffer The data to broadcast */ -/* Non-blocking version */ -static int xcast_nb(orte_jobid_t job, - orte_buffer_t *buffer, - orte_rml_tag_t tag) -{ - int rc = ORTE_SUCCESS; - struct timeval start, stop; - orte_vpid_t num_daemons; - - OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, - "%s xcast_nb sent to job %ld tag %ld", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)job, (long)tag)); - - /* if there is no message to send, then just return ok */ - if (NULL == buffer) { - return ORTE_SUCCESS; - } - - if (orte_timing) { - gettimeofday(&start, NULL); - } - - /* get the number of daemons currently in the system so we can - * select the "optimal" algorithm - */ - if (ORTE_SUCCESS != (rc = orte_ns.get_vpid_range(0, &num_daemons))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, - "%s xcast_nb: num_daemons %ld linear xover: %ld binomial xover: %ld", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)num_daemons, (long)orte_grpcomm_basic.xcast_linear_xover, - (long)orte_grpcomm_basic.xcast_binomial_xover)); - - if (num_daemons < 2 || orte_daemon_died) { - /* if there is only one daemon in the system, then we must - * use the direct mode - there is no other option. Note that - * since the HNP is the one that typically does xcast sends, - * only one daemon means that the HNP itself is sending to - * itself. This is required in singletons - where the - * singleton acts as the HNP - and as an HNP starts - * itself up - * - * NOTE: although we allow users to alter crossover points - * for selecting specific xcast modes, this required - * use-case behavior MUST always be retained or else - * singletons and HNP startup will fail! - * - * We also insist that the direct xcast mode be used when - * an orted has failed as we cannot rely on alternative - * methods to reach all orteds and/or procs - */ - rc = xcast_direct(job, buffer, tag); - goto DONE; - } - - /* now use the crossover points to select the proper transmission - * mode. We have built-in default crossover points for this - * decision tree, but the user is free to alter them as - * they wish via MCA params - */ - - if (num_daemons < orte_grpcomm_basic.xcast_linear_xover) { - rc = xcast_direct(job, buffer, tag); - } else if (num_daemons < orte_grpcomm_basic.xcast_binomial_xover) { - rc = xcast_linear(job, buffer, tag); - } else { - rc = xcast_binomial_tree(job, buffer, tag); - } - -DONE: - if (orte_timing) { - gettimeofday(&stop, NULL); - opal_output(0, "xcast_nb %s: time %ld usec", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long int)((stop.tv_sec - start.tv_sec)*1000000 + - (stop.tv_usec - start.tv_usec))); - } - - return rc; -} - /* Blocking version */ static int xcast(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag) { int rc = ORTE_SUCCESS; @@ -180,9 +73,9 @@ static int xcast(orte_jobid_t job, orte_vpid_t num_daemons; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, - "%s xcast sent to job %ld tag %ld", + "%s xcast sent to job %s tag %ld", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)job, (long)tag)); + ORTE_JOBID_PRINT(job), (long)tag)); /* if there is no message to send, then just return ok */ if (NULL == buffer) { @@ -193,14 +86,24 @@ static int xcast(orte_jobid_t job, gettimeofday(&start, NULL); } - /* get the number of daemons currently in the system so we can - * select the "optimal" algorithm + /* application procs do not know how many daemons are currently in + * the system. If we tell them that number at startup, then it might + * well be inaccurate if any dynamic spawns have occurred. To avoid + * the problem, have all application procs solely use the binomial + * xcast so the first message just goes to the HNP who then propagates + * it from there */ - if (ORTE_SUCCESS != (rc = orte_ns.get_vpid_range(0, &num_daemons))) { - ORTE_ERROR_LOG(rc); - return rc; + if (!orte_process_info.hnp && !orte_process_info.daemon) { + rc = xcast_binomial_tree(job, buffer, tag); + goto DONE; } + /* if we are the HNP or a daemon, then the num_procs field in our + * process_info struct contains the active number of daemons in the + * system - use it to decide what xcast algo to use + */ + num_daemons = orte_process_info.num_procs; + OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, "%s xcast: num_daemons %ld linear xover: %ld binomial xover: %ld", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -211,9 +114,8 @@ static int xcast(orte_jobid_t job, /* if there is only one daemon in the system, then we must * use the direct mode - there is no other option. Note that * since the HNP is the one that typically does xcast sends, - * only one daemon means that the HNP itself is sending to - * itself. This is required in singletons - where the - * singleton acts as the HNP - and as an HNP starts + * only one daemon means that the HNP is sending to + * itself. This is required as an HNP starts * itself up * * NOTE: although we allow users to alter crossover points @@ -244,23 +146,10 @@ static int xcast(orte_jobid_t job, } DONE: - /* if a daemon has failed AND this message was going to - * the daemons, then we don't want to wait - just return - */ - if (0 == job && orte_daemon_died) { - return rc; - } - - /* now go to sleep until woken up */ - OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); - while (orte_grpcomm_basic.num_active > 0) { - opal_condition_wait(&orte_grpcomm_basic.cond, &orte_grpcomm_basic.mutex); - } - OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); if (orte_timing) { gettimeofday(&stop, NULL); - opal_output(0, "xcast %s: time %ld usec", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + opal_output(0, "%s xcast: time %ld usec", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long int)((stop.tv_sec - start.tv_sec)*1000000 + (stop.tv_usec - start.tv_usec))); } @@ -268,15 +157,13 @@ DONE: } static int xcast_binomial_tree(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag) { - orte_daemon_cmd_flag_t command, mode; + orte_daemon_cmd_flag_t command; + orte_grpcomm_mode_t mode; int rc; - orte_process_name_t target; - orte_buffer_t *buf; - orte_vpid_t nd; - orte_std_cntr_t num_daemons; + opal_buffer_t *buf; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, "%s xcast_binomial", @@ -289,26 +176,18 @@ static int xcast_binomial_tree(orte_jobid_t job, /* need to pack the msg for sending - be sure to include routing info so it * can properly be sent through the daemons */ - buf = OBJ_NEW(orte_buffer_t); + buf = OBJ_NEW(opal_buffer_t); - /* tell the daemon the routing algorithm so it can figure - * out how to forward the message down the tree, if at all - */ - mode = ORTE_DAEMON_ROUTE_BINOMIAL; - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &mode, 1, ORTE_DAEMON_CMD))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* get the number of daemons currently in the system and tell the daemon so - * it can properly route - */ - if (ORTE_SUCCESS != (rc = orte_ns.get_vpid_range(0, &nd))) { + /* tell the daemon to process and relay */ + command = ORTE_DAEMON_PROCESS_AND_RELAY_CMD; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } - num_daemons = (orte_std_cntr_t)nd; - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &num_daemons, 1, ORTE_STD_CNTR))) { + + /* tell the daemon the routing algorithm this xmission is using */ + mode = ORTE_GRPCOMM_BINOMIAL; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &mode, 1, ORTE_GRPCOMM_MODE))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } @@ -324,15 +203,15 @@ static int xcast_binomial_tree(orte_jobid_t job, */ if (ORTE_RML_TAG_DAEMON != tag) { command = ORTE_DAEMON_MESSAGE_LOCAL_PROCS; - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &job, 1, ORTE_JOBID))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &job, 1, ORTE_JOBID))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &tag, 1, ORTE_RML_TAG))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_RML_TAG))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } @@ -342,7 +221,7 @@ static int xcast_binomial_tree(orte_jobid_t job, * caller is still responsible for releasing any memory in the buffer they * gave to us */ - if (ORTE_SUCCESS != (rc = orte_dss.copy_payload(buf, buffer))) { + if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(buf, buffer))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } @@ -352,41 +231,42 @@ static int xcast_binomial_tree(orte_jobid_t job, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)buf->bytes_used)); - /* all we need to do is send this to ourselves - our relay logic + /* all we need to do is send this to the HNP - the relay logic * will ensure everyone else gets it! */ - target.jobid = 0; - target.vpid = 0; - ++orte_grpcomm_basic.num_active; - OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, - "xcast_binomial: num_active now %ld sending %s => %s", - (long)orte_grpcomm_basic.num_active, + OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, + "%s xcast_binomial: sending %s => %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&target))); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(ORTE_PROC_MY_HNP))); - if (0 > (rc = orte_rml.send_buffer_nb(&target, buf, ORTE_RML_TAG_ORTED_ROUTED, - 0, xcast_send_cb, NULL))) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - rc = ORTE_ERR_COMM_FAILURE; - OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); - --orte_grpcomm_basic.num_active; - OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); - goto CLEANUP; - } + /* if I am the HNP, then just call the cmd processor - don't send this to myself */ + if (orte_process_info.hnp) { + if (ORTE_SUCCESS != (rc = orte_daemon_cmd_processor(ORTE_PROC_MY_NAME, buf, ORTE_RML_TAG_DAEMON))) { + ORTE_ERROR_LOG(rc); + } + } else { + if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_DAEMON, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + rc = ORTE_SUCCESS; + } CLEANUP: - /* the buffer will be released by the cb function */ + OBJ_RELEASE(buf); + return rc; } static int xcast_linear(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag) { int rc; - orte_buffer_t *buf; - orte_daemon_cmd_flag_t command, mode=ORTE_DAEMON_ROUTE_NONE; + opal_buffer_t *buf; + orte_daemon_cmd_flag_t command; orte_vpid_t i, range; orte_process_name_t dummy; @@ -400,14 +280,8 @@ static int xcast_linear(orte_jobid_t job, * daemon. This buffer will contain all the info needed by the * daemon, plus the payload intended for the processes themselves */ - buf = OBJ_NEW(orte_buffer_t); + buf = OBJ_NEW(opal_buffer_t); - /* tell the daemon that no further routing required */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &mode, 1, ORTE_DAEMON_CMD))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - /* if this isn't intended for the daemon command tag, then we better * tell the daemon to deliver it to the procs, and what job is supposed * to get it - this occurs when a caller just wants to send something @@ -419,15 +293,15 @@ static int xcast_linear(orte_jobid_t job, */ if (ORTE_RML_TAG_DAEMON != tag) { command = ORTE_DAEMON_MESSAGE_LOCAL_PROCS; - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &job, 1, ORTE_JOBID))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &job, 1, ORTE_JOBID))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } - if (ORTE_SUCCESS != (rc = orte_dss.pack(buf, &tag, 1, ORTE_RML_TAG))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_RML_TAG))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } @@ -437,7 +311,7 @@ static int xcast_linear(orte_jobid_t job, * caller is still responsible for releasing any memory in the buffer they * gave to us */ - if (ORTE_SUCCESS != (rc = orte_dss.copy_payload(buf, buffer))) { + if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(buf, buffer))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } @@ -448,53 +322,39 @@ static int xcast_linear(orte_jobid_t job, (long)buf->bytes_used)); /* get the number of daemons out there */ - orte_ns.get_vpid_range(0, &range); + range = orte_process_info.num_procs; /* we have to account for all of the messages we are about to send * because the non-blocking send can come back almost immediately - before * we would get the chance to increment the num_active. This causes us * to not correctly wakeup and reset the xcast_in_progress flag */ - OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); - orte_grpcomm_basic.num_active += range; - OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); - - OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, - "%s xcast_linear: num_active now %ld", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)orte_grpcomm_basic.num_active)); /* send the message to each daemon as fast as we can */ - dummy.jobid = 0; + dummy.jobid = ORTE_PROC_MY_HNP->jobid; for (i=0; i < range; i++) { dummy.vpid = i; OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, - "xcast_linear: %s => %s", + "%s xcast_linear: %s => %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&dummy))); - /* we have to retain the buffer here since we are going to - * use it multiple times - */ - OBJ_RETAIN(buf); - if (0 > (rc = orte_rml.send_buffer_nb(&dummy, buf, ORTE_RML_TAG_ORTED_ROUTED, - 0, xcast_send_cb, NULL))) { - if (ORTE_ERR_ADDRESSEE_UNKNOWN != rc) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - rc = ORTE_ERR_COMM_FAILURE; - OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); - orte_grpcomm_basic.num_active -= (range-i); - OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); + + /* if the target is the HNP and I am the HNP, then just call the cmd processor */ + if (0 == i && orte_process_info.hnp) { + if (ORTE_SUCCESS != (rc = orte_daemon_cmd_processor(ORTE_PROC_MY_NAME, buf, ORTE_RML_TAG_DAEMON))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + } else { + if (0 > (rc = orte_rml.send_buffer(&dummy, buf, ORTE_RML_TAG_DAEMON, 0))) { + ORTE_ERROR_LOG(rc); goto CLEANUP; } - /* decrement the number we are waiting to see */ - OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); - orte_grpcomm_basic.num_active--; - OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); } } rc = ORTE_SUCCESS; - - /* cleanup */ + CLEANUP: /* need to release the buffer so that the reference count will be correct * when the cb function releases it @@ -504,47 +364,25 @@ CLEANUP: } static int xcast_direct(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag) { - orte_std_cntr_t i; int rc; - orte_process_name_t *peers=NULL; - orte_std_cntr_t n; - opal_list_t attrs; - opal_list_item_t *item; - orte_buffer_t *buf; + orte_process_name_t peer; + orte_vpid_t i, vpid; + orte_job_t *jdata; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, "%s xcast_direct", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* it seems unnecessary, but we need to protect the data because the cb function - * is going to release the buffer - so create a local buffer and copy the - * payload across - */ - buf = OBJ_NEW(orte_buffer_t); - /* copy the payload into the new buffer - this is non-destructive, so our - * caller is still responsible for releasing any memory in the buffer they - * gave to us - */ - if (ORTE_SUCCESS != (rc = orte_dss.copy_payload(buf, buffer))) { - ORTE_ERROR_LOG(rc); + /* need to get the job peers so we know who to send the message to */ + if (NULL == (jdata = orte_get_job_data_object(job))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rc = ORTE_ERR_NOT_FOUND; goto CLEANUP; } - - /* need to get the job peers so we know who to send the message to */ - OBJ_CONSTRUCT(&attrs, opal_list_t); - orte_rmgr.add_attribute(&attrs, ORTE_NS_USE_JOBID, ORTE_JOBID, &job, ORTE_RMGR_ATTR_OVERRIDE); - if (ORTE_SUCCESS != (rc = orte_ns.get_peers(&peers, &n, &attrs))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attrs); - OBJ_RELEASE(buf); - return rc; - } - item = opal_list_remove_first(&attrs); - OBJ_RELEASE(item); - OBJ_DESTRUCT(&attrs); + vpid = jdata->num_procs; OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, "%s xcast_direct: buffer size %ld", @@ -556,57 +394,44 @@ static int xcast_direct(orte_jobid_t job, * we would get the chance to increment the num_active. This causes us * to not correctly wakeup and reset the xcast_in_progress flag */ - OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); - orte_grpcomm_basic.num_active += n; - OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); - OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, - "%s xcast_direct: num_active now %ld", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)orte_grpcomm_basic.num_active)); - - for(i=0; i %s", + "%s xcast_direct: %s => %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peers+i))); - /* we have to retain the buffer here since we are going to - * use it multiple times - */ - OBJ_RETAIN(buf); - if (0 > (rc = orte_rml.send_buffer_nb(peers+i, buf, tag, 0, xcast_send_cb, NULL))) { - if (ORTE_ERR_ADDRESSEE_UNKNOWN != rc) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - rc = ORTE_ERR_COMM_FAILURE; - OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); - orte_grpcomm_basic.num_active -= (n-i); - OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&peer))); + + /* if the target is the HNP and I am the HNP, then just call the cmd processor */ + if (peer.jobid == ORTE_PROC_MY_NAME->jobid && + peer.vpid == ORTE_PROC_MY_NAME->vpid && + orte_process_info.hnp) { + if (ORTE_SUCCESS != (rc = orte_daemon_cmd_processor(ORTE_PROC_MY_NAME, buffer, tag))) { + ORTE_ERROR_LOG(rc); goto CLEANUP; } - /* decrement the number we are waiting to see */ - OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); - orte_grpcomm_basic.num_active--; - OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); - } + } else { + if (0 > (rc = orte_rml.send_buffer(&peer, buffer, tag, 0))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + } } rc = ORTE_SUCCESS; - + CLEANUP: - /* need to release the buffer so that the reference count will be correct - * when the cb function releases it - */ - OBJ_RELEASE(buf); - free(peers); - + /* nothing to cleanup here */ return rc; } -static int allgather(orte_buffer_t *sbuf, orte_buffer_t *rbuf) +static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf) { orte_process_name_t name; int rc; - orte_std_cntr_t i; - orte_buffer_t tmpbuf; + orte_vpid_t i; + opal_buffer_t tmpbuf; /* everything happens within my jobid */ name.jobid = ORTE_PROC_MY_NAME->jobid; @@ -633,13 +458,13 @@ static int allgather(orte_buffer_t *sbuf, orte_buffer_t *rbuf) } /* seed the outgoing buffer with the num_procs so it can be unpacked */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(rbuf, &orte_process_info.num_procs, 1, ORTE_STD_CNTR))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(rbuf, &orte_process_info.num_procs, 1, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); return rc; } /* put my own information into the outgoing buffer */ - if (ORTE_SUCCESS != (rc = orte_dss.copy_payload(rbuf, sbuf))) { + if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(rbuf, sbuf))) { ORTE_ERROR_LOG(rc); return rc; } @@ -650,8 +475,8 @@ static int allgather(orte_buffer_t *sbuf, orte_buffer_t *rbuf) /* rank=0 receives everyone else's data */ for (i=1; i < orte_process_info.num_procs; i++) { - name.vpid = (orte_vpid_t)i; - OBJ_CONSTRUCT(&tmpbuf, orte_buffer_t); + name.vpid = i; + OBJ_CONSTRUCT(&tmpbuf, opal_buffer_t); if (0 > orte_rml.recv_buffer(&name, &tmpbuf, ORTE_RML_TAG_ALLGATHER, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); return ORTE_ERR_COMM_FAILURE; @@ -660,7 +485,7 @@ static int allgather(orte_buffer_t *sbuf, orte_buffer_t *rbuf) "%s allgather buffer %ld received", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)i)); /* append this data to the rbuf */ - if (ORTE_SUCCESS != (rc = orte_dss.copy_payload(rbuf, &tmpbuf))) { + if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(rbuf, &tmpbuf))) { ORTE_ERROR_LOG(rc); return rc; } @@ -672,7 +497,7 @@ static int allgather(orte_buffer_t *sbuf, orte_buffer_t *rbuf) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* xcast the results */ - orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, rbuf, ORTE_RML_TAG_ALLGATHER); + xcast(ORTE_PROC_MY_NAME->jobid, rbuf, ORTE_RML_TAG_ALLGATHER); /* xcast automatically ensures that the sender -always- gets a copy * of the message. This is required to ensure proper operation of the @@ -680,7 +505,7 @@ static int allgather(orte_buffer_t *sbuf, orte_buffer_t *rbuf) * post our own receive here so that we don't leave a message rattling * around in our RML */ - OBJ_CONSTRUCT(&tmpbuf, orte_buffer_t); + OBJ_CONSTRUCT(&tmpbuf, opal_buffer_t); if (0 > (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &tmpbuf, ORTE_RML_TAG_ALLGATHER, 0))) { ORTE_ERROR_LOG(rc); return rc; @@ -694,12 +519,12 @@ static int allgather(orte_buffer_t *sbuf, orte_buffer_t *rbuf) return ORTE_SUCCESS; } -static int allgather_list(opal_list_t *names, orte_buffer_t *sbuf, orte_buffer_t *rbuf) +static int allgather_list(opal_list_t *names, opal_buffer_t *sbuf, opal_buffer_t *rbuf) { opal_list_item_t *item; orte_namelist_t *peer, *root; orte_std_cntr_t i, num_peers; - orte_buffer_t tmpbuf; + opal_buffer_t tmpbuf; int rc; /* the first entry on the list is the "root" that collects @@ -708,14 +533,14 @@ static int allgather_list(opal_list_t *names, orte_buffer_t *sbuf, orte_buffer_t */ root = (orte_namelist_t*)opal_list_get_first(names); - if (ORTE_EQUAL != orte_dss.compare(root->name, ORTE_PROC_MY_NAME, ORTE_NAME)) { + if (OPAL_EQUAL != opal_dss.compare(&root->name, ORTE_PROC_MY_NAME, ORTE_NAME)) { /* everyone but root sends data */ - if (0 > (rc = orte_rml.send_buffer(root->name, sbuf, ORTE_RML_TAG_ALLGATHER_LIST, 0))) { + if (0 > (rc = orte_rml.send_buffer(&root->name, sbuf, ORTE_RML_TAG_ALLGATHER_LIST, 0))) { ORTE_ERROR_LOG(rc); return rc; } /* now receive the final result */ - if (0 > (rc = orte_rml.recv_buffer(root->name, rbuf, ORTE_RML_TAG_ALLGATHER_LIST, 0))) { + if (0 > (rc = orte_rml.recv_buffer(&root->name, rbuf, ORTE_RML_TAG_ALLGATHER_LIST, 0))) { ORTE_ERROR_LOG(rc); return rc; } @@ -726,13 +551,13 @@ static int allgather_list(opal_list_t *names, orte_buffer_t *sbuf, orte_buffer_t num_peers = (orte_std_cntr_t)opal_list_get_size(names); /* seed the outgoing buffer with the num_procs so it can be unpacked */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(rbuf, &num_peers, 1, ORTE_STD_CNTR))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(rbuf, &num_peers, 1, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); return rc; } /* put my own information into the outgoing buffer */ - if (ORTE_SUCCESS != (rc = orte_dss.copy_payload(rbuf, sbuf))) { + if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(rbuf, sbuf))) { ORTE_ERROR_LOG(rc); return rc; } @@ -740,13 +565,13 @@ static int allgather_list(opal_list_t *names, orte_buffer_t *sbuf, orte_buffer_t /* root receives everyone else's data */ for (i=1; i < num_peers; i++) { /* receive the buffer from this process */ - OBJ_CONSTRUCT(&tmpbuf, orte_buffer_t); + OBJ_CONSTRUCT(&tmpbuf, opal_buffer_t); if (0 > orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &tmpbuf, ORTE_RML_TAG_ALLGATHER_LIST, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); return ORTE_ERR_COMM_FAILURE; } /* append this data to the rbuf */ - if (ORTE_SUCCESS != (rc = orte_dss.copy_payload(rbuf, &tmpbuf))) { + if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(rbuf, &tmpbuf))) { ORTE_ERROR_LOG(rc); return rc; } @@ -761,12 +586,12 @@ static int allgather_list(opal_list_t *names, orte_buffer_t *sbuf, orte_buffer_t peer = (orte_namelist_t*)item; /* skip myself */ - if (ORTE_EQUAL == orte_dss.compare(root->name, peer->name, ORTE_NAME)) { + if (OPAL_EQUAL == opal_dss.compare(&root->name, &peer->name, ORTE_NAME)) { continue; } /* transmit the buffer to this process */ - if (0 > orte_rml.send_buffer(peer->name, rbuf, ORTE_RML_TAG_ALLGATHER_LIST, 0)) { + if (0 > orte_rml.send_buffer(&peer->name, rbuf, ORTE_RML_TAG_ALLGATHER_LIST, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); return ORTE_ERR_COMM_FAILURE; } @@ -779,8 +604,8 @@ static int allgather_list(opal_list_t *names, orte_buffer_t *sbuf, orte_buffer_t static int barrier(void) { orte_process_name_t name; - orte_std_cntr_t i; - orte_buffer_t buf; + orte_vpid_t i; + opal_buffer_t buf; int rc; /* everything happens within the same jobid */ @@ -789,10 +614,14 @@ static int barrier(void) /* All non-root send & receive zero-length message. */ if (0 != ORTE_PROC_MY_NAME->vpid) { name.vpid = 0; - OBJ_CONSTRUCT(&buf, orte_buffer_t); + OBJ_CONSTRUCT(&buf, opal_buffer_t); i=0; - orte_dss.pack(&buf, &i, 1, ORTE_STD_CNTR); /* put something meaningless here */ + opal_dss.pack(&buf, &i, 1, ORTE_STD_CNTR); /* put something meaningless here */ + OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, + "%s sending barrier", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + rc = orte_rml.send_buffer(&name,&buf,ORTE_RML_TAG_BARRIER,0); if (rc < 0) { ORTE_ERROR_LOG(rc); @@ -801,19 +630,24 @@ static int barrier(void) OBJ_DESTRUCT(&buf); /* get the release from rank=0 */ - OBJ_CONSTRUCT(&buf, orte_buffer_t); + OBJ_CONSTRUCT(&buf, opal_buffer_t); rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD,&buf,ORTE_RML_TAG_BARRIER,0); if (rc < 0) { ORTE_ERROR_LOG(rc); return rc; } OBJ_DESTRUCT(&buf); + + OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, + "%s received barrier release", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + return ORTE_SUCCESS; } for (i = 1; i < orte_process_info.num_procs; i++) { name.vpid = (orte_vpid_t)i; - OBJ_CONSTRUCT(&buf, orte_buffer_t); + OBJ_CONSTRUCT(&buf, opal_buffer_t); OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, "%s barrier %ld received", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)i)); @@ -830,8 +664,8 @@ static int barrier(void) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* xcast the release */ - OBJ_CONSTRUCT(&buf, orte_buffer_t); - orte_dss.pack(&buf, &i, 1, ORTE_STD_CNTR); /* put something meaningless here */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + opal_dss.pack(&buf, &i, 1, ORTE_STD_CNTR); /* put something meaningless here */ orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, &buf, ORTE_RML_TAG_BARRIER); OBJ_DESTRUCT(&buf); @@ -841,7 +675,7 @@ static int barrier(void) * post our own receive here so that we don't leave a message rattling * around in our RML */ - OBJ_CONSTRUCT(&buf, orte_buffer_t); + OBJ_CONSTRUCT(&buf, opal_buffer_t); if (0 > (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, ORTE_RML_TAG_BARRIER, 0))) { ORTE_ERROR_LOG(rc); return rc; @@ -854,11 +688,536 @@ static int barrier(void) return ORTE_SUCCESS; } +static int chain_recips(opal_list_t *names) +{ + orte_namelist_t *target; + + /* chain just sends to the next vpid up the line */ + if (ORTE_PROC_MY_NAME->vpid < orte_process_info.num_procs-1) { + /* I am not at the end of the chain */ + if (NULL == (target = OBJ_NEW(orte_namelist_t))) { + return ORTE_ERR_OUT_OF_RESOURCE; + } + target->name.jobid = ORTE_PROC_MY_NAME->jobid; + target->name.vpid = ORTE_PROC_MY_NAME->vpid + 1; + opal_list_append(names, &target->item); + } + return ORTE_SUCCESS; +} + +static int binomial_recips(opal_list_t *names) +{ + int i, bitmap, peer, size, rank, hibit, mask; + orte_namelist_t *target; + + /* compute the bitmap */ + bitmap = opal_cube_dim((int)orte_process_info.num_procs); + rank = (int)ORTE_PROC_MY_NAME->vpid; + size = (int)orte_process_info.num_procs; + + hibit = opal_hibit(rank, bitmap); + --bitmap; + + for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) { + peer = rank | mask; + if (peer < size) { + if (NULL == (target = OBJ_NEW(orte_namelist_t))) { + return ORTE_ERR_OUT_OF_RESOURCE; + } + target->name.jobid = ORTE_PROC_MY_NAME->jobid; + target->name.vpid = (orte_vpid_t)peer; + opal_list_append(names, &target->item); + } + } + return ORTE_SUCCESS; +} + +static int next_recips(opal_list_t *names, orte_grpcomm_mode_t mode) +{ + int rc; + + /* check the mode to select the proper algo */ + switch (mode) { + case ORTE_GRPCOMM_CHAIN: + rc = chain_recips(names); + break; + case ORTE_GRPCOMM_BINOMIAL: + rc = binomial_recips(names); + break; + default: + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rc = ORTE_ERR_NOT_FOUND; + break; + } + return rc; +} + + +/*************** MODEX SECTION **************/ + +/** + * MODEX DESIGN + * + * Modex data is always associated with a given orte process name, in + * an opal hash table. The hash table is necessary because modex data is + * received for entire jobids and when working with + * dynamic processes, it is possible we will receive data for a + * process not yet in the ompi_proc_all() list of processes. This + * information must be kept for later use, because if accept/connect + * causes the proc to be added to the ompi_proc_all() list, it could + * cause a connection storm. Therefore, we use an + * orte_proc_table backing store to contain all modex information. + * + * While we could add the now discovered proc into the ompi_proc_all() + * list, this has some problems, in that we don't have the + * architecture and hostname information needed to properly fill in + * the ompi_proc_t structure and we don't want to cause RML + * communication to get it when we don't really need to know anything + * about the remote proc. + * + * All data put into the modex (or received from the modex) is + * associated with a given proc,attr_name pair. The data structures + * to maintain this data look something like: + * + * opal_hash_table_t modex_data -> list of attr_proc_t objects + * + * +-----------------------------+ + * | modex_proc_data_t | + * | - opal_list_item_t | + * +-----------------------------+ + * | opal_mutex_t modex_lock | + * | bool modex_received_data | 1 + * | opal_list_t modules | ---------+ + * +-----------------------------+ | + * * | + * +--------------------------------+ <--------+ + * | modex_module_data_t | + * | - opal_list_item_t | + * +--------------------------------+ + * | mca_base_component_t component | + * | void *module_data | + * | size_t module_data_size | + * +--------------------------------+ + * + */ + + +/** + * Modex data for a particular orte process + * + * Locking infrastructure and list of module data for a given orte + * process name. The name association is maintained in the + * modex_data hash table. + */ +struct modex_proc_data_t { + /** Structure can be put on lists (including in hash tables) */ + opal_list_item_t super; + /* Lock held whenever the modex data for this proc is being + modified */ + opal_mutex_t modex_lock; + /* True if modex data has ever been received from this process, + false otherwise. */ + bool modex_received_data; + /* List of modex_module_data_t structures containing all data + received from this process, sorted by component name. */ + opal_list_t modex_module_data; +}; +typedef struct modex_proc_data_t modex_proc_data_t; + +static void +modex_construct(modex_proc_data_t * modex) +{ + OBJ_CONSTRUCT(&modex->modex_lock, opal_mutex_t); + modex->modex_received_data = false; + OBJ_CONSTRUCT(&modex->modex_module_data, opal_list_t); +} + +static void +modex_destruct(modex_proc_data_t * modex) +{ + OBJ_DESTRUCT(&modex->modex_module_data); + OBJ_DESTRUCT(&modex->modex_lock); +} + +OBJ_CLASS_INSTANCE(modex_proc_data_t, opal_object_t, + modex_construct, modex_destruct); + + + +/** + * Data for a particular attribute + * + * Container for data for a particular module,attribute pair. This + * structure should be contained in the modex_module_data list in an + * modex_proc_data_t structure to maintain an association with a + * given proc. The list is then searched for a matching attribute + * name. + * + * While searching the list or reading from (or writing to) this + * structure, the lock in the proc_data_t should be held. + */ +struct modex_attr_data_t { + /** Structure can be put on lists */ + opal_list_item_t super; + /** Attribute name */ + char * attr_name; + /** Binary blob of data associated with this proc,component pair */ + void *attr_data; + /** Size (in bytes) of module_data */ + size_t attr_data_size; +}; +typedef struct modex_attr_data_t modex_attr_data_t; + +static void +modex_attr_construct(modex_attr_data_t * module) +{ + module->attr_name = NULL; + module->attr_data = NULL; + module->attr_data_size = 0; +} + +static void +modex_attr_destruct(modex_attr_data_t * module) +{ + if (NULL != module->attr_name) { + free(module->attr_name); + } + if (NULL != module->attr_data) { + free(module->attr_data); + } +} + +OBJ_CLASS_INSTANCE(modex_attr_data_t, + opal_list_item_t, + modex_attr_construct, + modex_attr_destruct); + + +/** + * Find data for a given attribute in a given modex_proc_data_t + * container. + * + * The proc_data's modex_lock must be held during this + * search. + */ +static modex_attr_data_t * +modex_lookup_attr_data(modex_proc_data_t *proc_data, + const char *attr_name, + bool create_if_not_found) +{ + modex_attr_data_t *attr_data = NULL; + for (attr_data = (modex_attr_data_t *) opal_list_get_first(&proc_data->modex_module_data); + attr_data != (modex_attr_data_t *) opal_list_get_end(&proc_data->modex_module_data); + attr_data = (modex_attr_data_t *) opal_list_get_next(attr_data)) { + if (0 == strcmp(attr_name, attr_data->attr_name)) { + return attr_data; + } + } + + if (create_if_not_found) { + attr_data = OBJ_NEW(modex_attr_data_t); + if (NULL == attr_data) return NULL; + + attr_data->attr_name = strdup(attr_name); + opal_list_append(&proc_data->modex_module_data, &attr_data->super); + + return attr_data; + } + + return NULL; +} + + +/** +* Find modex_proc_data_t container associated with given + * orte_process_name_t. + * + * The global lock should *NOT* be held when + * calling this function. + */ +static modex_proc_data_t* +modex_lookup_orte_proc(const orte_process_name_t *orte_proc) +{ + modex_proc_data_t *proc_data; + + OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); + proc_data = (modex_proc_data_t*) + orte_hash_table_get_proc(&orte_grpcomm_basic.modex_data, orte_proc); + if (NULL == proc_data) { + /* The proc clearly exists, so create a modex structure + for it */ + proc_data = OBJ_NEW(modex_proc_data_t); + if (NULL == proc_data) { + opal_output(0, "grpcomm_basic_modex_lookup_orte_proc: unable to allocate modex_proc_data_t\n"); + OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); + return NULL; + } + orte_hash_table_set_proc(&orte_grpcomm_basic.modex_data, orte_proc, proc_data); + } + OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); + + return proc_data; +} + + +static int set_proc_attr(const char *attr_name, + const void *data, + size_t size) +{ + int rc; + + OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); + + /* Pack the attribute name information into the local buffer */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&orte_grpcomm_basic.modex_buffer, &attr_name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* pack the size */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&orte_grpcomm_basic.modex_buffer, &size, 1, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* Pack the actual data into the buffer */ + if (0 != size) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&orte_grpcomm_basic.modex_buffer, (void *) data, size, OPAL_BYTE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + + /* track the number of entries */ + ++orte_grpcomm_basic.modex_num_entries; + +cleanup: + OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); + + return rc; +} + +static int get_proc_attr(const orte_process_name_t proc, + const char * attribute_name, void **val, + size_t *size) +{ + modex_proc_data_t *proc_data; + modex_attr_data_t *attr_data; + + proc_data = modex_lookup_orte_proc(&proc); + if (NULL == proc_data) return ORTE_ERR_NOT_FOUND; + + OPAL_THREAD_LOCK(&proc_data->modex_lock); + + /* look up attribute */ + attr_data = modex_lookup_attr_data(proc_data, attribute_name, false); + + /* copy the data out to the user */ + if ((NULL == attr_data) || + (attr_data->attr_data_size == 0)) { + opal_output(0, "grpcomm_basic_get_proc_attr: no attr avail or zero byte size"); + *val = NULL; + *size = 0; + } else { + void *copy = malloc(attr_data->attr_data_size); + + if (copy == NULL) { + OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + return ORTE_ERR_OUT_OF_RESOURCE; + } + memcpy(copy, attr_data->attr_data, attr_data->attr_data_size); + *val = copy; + *size = attr_data->attr_data_size; + } + OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + + return ORTE_SUCCESS; +} + + +static int modex(opal_list_t *procs) +{ + opal_buffer_t buf, rbuf; + orte_std_cntr_t i, j, num_procs, num_entries; + void *bytes = NULL; + orte_std_cntr_t cnt; + orte_process_name_t proc_name; + modex_proc_data_t *proc_data; + modex_attr_data_t *attr_data; + int rc; + + /* setup the buffer that will actually be sent */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + OBJ_CONSTRUCT(&rbuf, opal_buffer_t); + + /* put our process name in the buffer so it can be unpacked later */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* put the number of entries into the buffer */ + OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_grpcomm_basic.modex_num_entries, 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); + goto cleanup; + } + + /* if there are entries, non-destructively copy the data across */ + if (0 < orte_grpcomm_basic.modex_num_entries) { + if (ORTE_SUCCESS != (opal_dss.copy_payload(&buf, &orte_grpcomm_basic.modex_buffer))) { + ORTE_ERROR_LOG(rc); + OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); + goto cleanup; + } + } + OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); + + /* exchange the buffer with the list of peers (if provided) or all my peers */ + if (NULL == procs) { + if (ORTE_SUCCESS != (rc = allgather(&buf, &rbuf))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } else { + if (ORTE_SUCCESS != (rc = allgather_list(procs, &buf, &rbuf))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + + /* process the results */ + /* extract the number of procs that put data in the buffer */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &num_procs, &cnt, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* process the buffer */ + for (i=0; i < num_procs; i++) { + /* unpack the process name */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &proc_name, &cnt, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* look up the modex data structure */ + proc_data = modex_lookup_orte_proc(&proc_name); + if (proc_data == NULL) { + /* report the error */ + opal_output(0, "grpcomm_basic_modex: received modex info for unknown proc %s\n", + ORTE_NAME_PRINT(&proc_name)); + rc = ORTE_ERR_NOT_FOUND; + goto cleanup; + } + + /* unpack the number of entries for this proc */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &num_entries, &cnt, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + OPAL_THREAD_LOCK(&proc_data->modex_lock); + + /* + * Extract the attribute names and values + */ + for (j = 0; j < num_entries; j++) { + size_t num_bytes; + char *attr_name; + + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &attr_name, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + goto cleanup; + } + + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &num_bytes, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + goto cleanup; + } + if (num_bytes != 0) { + if (NULL == (bytes = malloc(num_bytes))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + rc = ORTE_ERR_OUT_OF_RESOURCE; + OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + goto cleanup; + } + cnt = (orte_std_cntr_t) num_bytes; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, bytes, &cnt, OPAL_BYTE))) { + ORTE_ERROR_LOG(rc); + OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + goto cleanup; + } + num_bytes = cnt; + } else { + bytes = NULL; + } + + /* + * Lookup the corresponding modex structure + */ + if (NULL == (attr_data = modex_lookup_attr_data(proc_data, + attr_name, true))) { + opal_output(0, "grpcomm_basic_modex: modex_lookup_attr_data failed\n"); + OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + rc = ORTE_ERR_NOT_FOUND; + goto cleanup; + } + if (NULL != attr_data->attr_data) { + /* some pre-existing value must be here - release it */ + free(attr_data->attr_data); + } + attr_data->attr_data = bytes; + attr_data->attr_data_size = num_bytes; + proc_data->modex_received_data = true; + } + OPAL_THREAD_UNLOCK(&proc_data->modex_lock); + } + +cleanup: + OBJ_DESTRUCT(&buf); + OBJ_DESTRUCT(&rbuf); + + return rc; +} + +static int purge_proc_attrs(void) +{ + /* + * Purge the attributes + */ + opal_hash_table_remove_all(&orte_grpcomm_basic.modex_data); + OBJ_DESTRUCT(&orte_grpcomm_basic.modex_data); + OBJ_CONSTRUCT(&orte_grpcomm_basic.modex_data, opal_hash_table_t); + opal_hash_table_init(&orte_grpcomm_basic.modex_data, 256); + + /* + * Clear the modex buffer + */ + OBJ_DESTRUCT(&orte_grpcomm_basic.modex_buffer); + OBJ_CONSTRUCT(&orte_grpcomm_basic.modex_buffer, opal_buffer_t); + orte_grpcomm_basic.modex_num_entries = 0; + + return ORTE_SUCCESS; +} + orte_grpcomm_base_module_t orte_grpcomm_basic_module = { xcast, - xcast_nb, allgather, allgather_list, - barrier + barrier, + next_recips, + set_proc_attr, + get_proc_attr, + modex, + purge_proc_attrs }; diff --git a/orte/mca/grpcomm/cnos/configure.params b/orte/mca/grpcomm/cnos/configure.params index 3513f8d956..4e4404321c 100644 --- a/orte/mca/grpcomm/cnos/configure.params +++ b/orte/mca/grpcomm/cnos/configure.params @@ -19,6 +19,10 @@ # $HEADER$ # -# Specific to this module - PARAM_CONFIG_FILES="Makefile" + +# +# Set the config priority so that, if we can build, +# only CNOS component(s) will build + +PARAM_CONFIG_PRIORITY=30 diff --git a/orte/mca/grpcomm/cnos/grpcomm_cnos.h b/orte/mca/grpcomm/cnos/grpcomm_cnos.h index 2353eafab7..4ab4ee9d36 100644 --- a/orte/mca/grpcomm/cnos/grpcomm_cnos.h +++ b/orte/mca/grpcomm/cnos/grpcomm_cnos.h @@ -21,8 +21,8 @@ #define GRPCOMM_CNOS_H #include "orte_config.h" -#include "orte/orte_types.h" -#include "orte/orte_constants.h" +#include "orte/types.h" +#include "orte/constants.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" diff --git a/orte/mca/grpcomm/cnos/grpcomm_cnos_component.c b/orte/mca/grpcomm/cnos/grpcomm_cnos_component.c index d2e4163e33..e7812f8320 100644 --- a/orte/mca/grpcomm/cnos/grpcomm_cnos_component.c +++ b/orte/mca/grpcomm/cnos/grpcomm_cnos_component.c @@ -30,21 +30,12 @@ * includes */ #include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/threads/mutex.h" -#include "opal/class/opal_list.h" -#include "opal/util/output.h" +#include "orte/constants.h" +#include "orte/types.h" #include "opal/mca/mca.h" #include "opal/mca/base/mca_base_param.h" -#include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - #include "grpcomm_cnos.h" diff --git a/orte/mca/grpcomm/cnos/grpcomm_cnos_module.c b/orte/mca/grpcomm/cnos/grpcomm_cnos_module.c index f591f95adf..034c588a62 100644 --- a/orte/mca/grpcomm/cnos/grpcomm_cnos_module.c +++ b/orte/mca/grpcomm/cnos/grpcomm_cnos_module.c @@ -18,15 +18,15 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" +#include "orte/types.h" #include #ifdef HAVE_SYS_TIME_H #include #endif /* HAVE_SYS_TIME_H */ -#include "orte/dss/dss.h" -#include "orte/mca/ns/ns_types.h" +#include "opal/dss/dss.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml_types.h" @@ -37,26 +37,40 @@ #endif /* API functions */ -static int xcast_nb(orte_jobid_t job, - orte_buffer_t *buffer, - orte_rml_tag_t tag); - static int xcast(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag); static int orte_grpcomm_cnos_barrier(void); -static int allgather(orte_buffer_t *sbuf, orte_buffer_t *rbuf); +static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf); -static int allgather_list(opal_list_t *names, orte_buffer_t *sbuf, orte_buffer_t *rbuf); +static int allgather_list(opal_list_t *names, opal_buffer_t *sbuf, opal_buffer_t *rbuf); + +static int next_recips(opal_list_t *names, orte_grpcomm_mode_t mode); + +static int set_proc_attr(const char *attr_name, + const void *data, + size_t size); + +static int get_proc_attr(const orte_process_name_t proc, + const char * attribute_name, void **val, + size_t *size); + +static int modex(opal_list_t *procs); + +static int purge_proc_attrs(void); orte_grpcomm_base_module_t orte_grpcomm_cnos_module = { xcast, - xcast_nb, allgather, allgather_list, - orte_grpcomm_cnos_barrier + orte_grpcomm_cnos_barrier, + next_recips, + set_proc_attr, + get_proc_attr, + modex, + purge_proc_attrs }; @@ -66,17 +80,9 @@ orte_grpcomm_base_module_t orte_grpcomm_cnos_module = { * @param buffer The data to broadcast */ -/* Non-blocking version */ -static int xcast_nb(orte_jobid_t job, - orte_buffer_t *buffer, - orte_rml_tag_t tag) -{ - return ORTE_SUCCESS; -} - /* Blocking version */ static int xcast(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag) { return ORTE_SUCCESS; @@ -92,28 +98,59 @@ orte_grpcomm_cnos_barrier(void) return ORTE_SUCCESS; } -static int allgather(orte_buffer_t *sbuf, orte_buffer_t *rbuf) +static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf) { int rc; orte_std_cntr_t zero=0; /* seed the outgoing buffer with num_procs=0 so it won't be unpacked */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(rbuf, &zero, 1, ORTE_STD_CNTR))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(rbuf, &zero, 1, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); return rc; } return rc; } -static int allgather_list(opal_list_t *names, orte_buffer_t *sbuf, orte_buffer_t *rbuf) +static int allgather_list(opal_list_t *names, opal_buffer_t *sbuf, opal_buffer_t *rbuf) { int rc; orte_std_cntr_t zero=0; /* seed the outgoing buffer with num_procs=0 so it won't be unpacked */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(rbuf, &zero, 1, ORTE_STD_CNTR))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(rbuf, &zero, 1, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); return rc; } return rc; } + +static int next_recips(opal_list_t *names, orte_grpcomm_mode_t mode) +{ + /* nothing to do here */ + return ORTE_SUCCESS; +} + +static int set_proc_attr(const char *attr_name, + const void *data, + size_t size) +{ + return ORTE_SUCCESS; +} + +static int get_proc_attr(const orte_process_name_t proc, + const char * attribute_name, void **val, + size_t *size) +{ + return ORTE_ERR_NOT_IMPLEMENTED; +} + +static int modex(opal_list_t *procs) +{ + return ORTE_SUCCESS; +} + +static int purge_proc_attrs(void) +{ + return ORTE_SUCCESS; +} + diff --git a/orte/mca/grpcomm/configure.m4 b/orte/mca/grpcomm/configure.m4 new file mode 100644 index 0000000000..b74cdc8068 --- /dev/null +++ b/orte/mca/grpcomm/configure.m4 @@ -0,0 +1,13 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2007 Los Alamos National Security, LLC. +dnl All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +dnl we only want same priority level components +m4_define(MCA_grpcomm_CONFIGURE_MODE, STOP_AT_FIRST_PRIORITY) diff --git a/orte/mca/grpcomm/grpcomm.h b/orte/mca/grpcomm/grpcomm.h index 2da9e1794c..011dd60d7e 100644 --- a/orte/mca/grpcomm/grpcomm.h +++ b/orte/mca/grpcomm/grpcomm.h @@ -34,21 +34,19 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" +#include "orte/constants.h" +#include "orte/types.h" #include "opal/mca/mca.h" #include "opal/class/opal_list.h" -#include "orte/dss/dss_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/ns/ns_types.h" +#include "opal/dss/dss_types.h" #include "orte/mca/rml/rml_types.h" +#include "orte/mca/odls/odls_types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +#include "orte/mca/grpcomm/grpcomm_types.h" +BEGIN_C_DECLS /* * Component functions - all MUST be provided! @@ -56,40 +54,60 @@ extern "C" { /* Send a message to all members of a job - blocking */ typedef int (*orte_grpcomm_base_module_xcast_fn_t)(orte_jobid_t job, - orte_buffer_t *buffer, + opal_buffer_t *buffer, orte_rml_tag_t tag); -/* Send a message to all members of a job - non-blocking */ -typedef int (*orte_grpcomm_base_module_xcast_nb_fn_t)(orte_jobid_t job, - orte_buffer_t *buffer, - orte_rml_tag_t tag); - /* allgather - gather data from all procs */ -typedef int (*orte_grpcomm_base_module_allgather_fn_t)(orte_buffer_t *sbuf, orte_buffer_t *rbuf); +typedef int (*orte_grpcomm_base_module_allgather_fn_t)(opal_buffer_t *sbuf, opal_buffer_t *rbuf); typedef int (*orte_grpcomm_base_module_allgather_list_fn_t)(opal_list_t *names, - orte_buffer_t *sbuf, orte_buffer_t *rbuf); + opal_buffer_t *sbuf, opal_buffer_t *rbuf); /* barrier function */ typedef int (*orte_grpcomm_base_module_barrier_fn_t)(void); +/* for collectives, return next recipients in the chain */ +typedef int (*orte_gprcomm_base_next_recipients_fn_t)(opal_list_t *list, orte_grpcomm_mode_t mode); + +/** DATA EXCHANGE FUNCTIONS - SEE ompi/runtime/ompi_module_exchange.h FOR A DESCRIPTION + * OF HOW THIS ALL WORKS + */ + +/* send an attribute buffer */ +typedef int (*orte_grpcomm_base_modex_set_proc_attr_fn_t)(const char* attr_name, + const void *buffer, size_t size); + +/* get an attribute buffer */ +typedef int (*orte_grpcomm_base_modex_get_proc_attr_fn_t)(const orte_process_name_t name, + const char* attr_name, + void **buffer, size_t *size); + +/* perform a modex operation */ +typedef int (*orte_grpcomm_base_modex_fn_t)(opal_list_t *procs); + +/* purge the internal attr table */ +typedef int (*orte_grpcomm_base_purge_proc_attrs_fn_t)(void); + + /* * Ver 2.0 */ struct orte_grpcomm_base_module_2_0_0_t { orte_grpcomm_base_module_xcast_fn_t xcast; - orte_grpcomm_base_module_xcast_nb_fn_t xcast_nb; orte_grpcomm_base_module_allgather_fn_t allgather; orte_grpcomm_base_module_allgather_list_fn_t allgather_list; orte_grpcomm_base_module_barrier_fn_t barrier; + orte_gprcomm_base_next_recipients_fn_t next_recipients; + /* modex support functions */ + orte_grpcomm_base_modex_set_proc_attr_fn_t set_proc_attr; + orte_grpcomm_base_modex_get_proc_attr_fn_t get_proc_attr; + orte_grpcomm_base_modex_fn_t modex; + orte_grpcomm_base_purge_proc_attrs_fn_t purge_proc_attrs; }; typedef struct orte_grpcomm_base_module_2_0_0_t orte_grpcomm_base_module_2_0_0_t; typedef orte_grpcomm_base_module_2_0_0_t orte_grpcomm_base_module_t; -/* - * NS Component - */ /** * Initialize the selected component. */ @@ -130,8 +148,6 @@ typedef orte_grpcomm_base_component_2_0_0_t orte_grpcomm_base_component_t; */ ORTE_DECLSPEC extern orte_grpcomm_base_module_t orte_grpcomm; /* holds selected module's function pointers */ -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/grpcomm/grpcomm_types.h b/orte/mca/grpcomm/grpcomm_types.h new file mode 100644 index 0000000000..f2852b160e --- /dev/null +++ b/orte/mca/grpcomm/grpcomm_types.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The OpenRTE Group Communications + * + * The OpenRTE Group Comm framework provides communication services that + * span entire jobs or collections of processes. It is not intended to be + * used for point-to-point communications (the RML does that), nor should + * it be viewed as a high-performance communication channel for large-scale + * data transfers. + */ + +#ifndef MCA_GRPCOMM_TYPES_H +#define MCA_GRPCOMM_TYPES_H + +/* + * includes + */ + +#include "orte_config.h" + +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +BEGIN_C_DECLS + +/* + * Define routing modes + */ +typedef uint8_t orte_grpcomm_mode_t; +#define ORTE_GRPCOMM_MODE_T OPAL_UINT8 + +/* daemon N relays message to daemon N+1 */ +#define ORTE_GRPCOMM_CHAIN (orte_grpcomm_mode_t) 1 +/* binomial tree */ +#define ORTE_GRPCOMM_BINOMIAL (orte_grpcomm_mode_t) 2 + + +END_C_DECLS + +#endif diff --git a/orte/mca/iof/base/base.h b/orte/mca/iof/base/base.h index c126918305..d47e877f2b 100644 --- a/orte/mca/iof/base/base.h +++ b/orte/mca/iof/base/base.h @@ -43,10 +43,7 @@ #include "opal/mca/mca.h" #include "orte/mca/iof/iof.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - +BEGIN_C_DECLS struct orte_iof_base_t { int iof_output; @@ -58,7 +55,7 @@ struct orte_iof_base_t { size_t iof_waiting; opal_free_list_t iof_fragments; size_t iof_window_size; - orte_process_name_t* iof_service; + orte_process_name_t iof_service; }; typedef struct orte_iof_base_t orte_iof_base_t; @@ -71,7 +68,6 @@ ORTE_DECLSPEC int orte_iof_base_flush(void); ORTE_DECLSPEC extern orte_iof_base_t orte_iof_base; -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* MCA_PML_H */ +END_C_DECLS + +#endif /* MCA_IOF_BASE_H */ diff --git a/orte/mca/iof/base/iof_base_close.c b/orte/mca/iof/base/iof_base_close.c index 5db57bca6a..756e68722d 100644 --- a/orte/mca/iof/base/iof_base_close.c +++ b/orte/mca/iof/base/iof_base_close.c @@ -17,6 +17,7 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include @@ -57,10 +58,6 @@ int orte_iof_base_close(void) OBJ_RELEASE(item); } - if (NULL != orte_iof_base.iof_service) { - free(orte_iof_base.iof_service); - } - OBJ_DESTRUCT(&orte_iof_base.iof_components_opened); OBJ_DESTRUCT(&orte_iof_base.iof_endpoints); OBJ_DESTRUCT(&orte_iof_base.iof_lock); diff --git a/orte/mca/iof/base/iof_base_endpoint.c b/orte/mca/iof/base/iof_base_endpoint.c index 3ac20db462..f7c6ff1f94 100644 --- a/orte/mca/iof/base/iof_base_endpoint.c +++ b/orte/mca/iof/base/iof_base_endpoint.c @@ -45,8 +45,9 @@ #endif /* HAVE_SIGNAL_H */ #include "opal/util/output.h" -#include "orte/mca/ns/ns.h" #include "orte/mca/rml/rml.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/iof/base/iof_base_endpoint.h" @@ -227,7 +228,7 @@ static void orte_iof_base_endpoint_read_handler(int fd, short flags, void *cbdat /* start non-blocking RML call to forward received data */ opal_output(orte_iof_base.iof_output, "iof_base_endpoint read handler: sending data to svc"); rc = orte_rml.send_nb( - orte_iof_base.iof_service, + &orte_iof_base.iof_service, frag->frag_iov, 2, ORTE_RML_TAG_IOF_SVC, @@ -348,7 +349,7 @@ static orte_iof_base_endpoint_t* orte_iof_base_endpoint_lookup( item != opal_list_get_end(&orte_iof_base.iof_endpoints); item = opal_list_get_next(item)) { orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)item; - if (ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL,proc,&endpoint->ep_origin) && + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,proc,&endpoint->ep_origin) && endpoint->ep_tag == tag && endpoint->ep_mode == mode) { OBJ_RETAIN(endpoint); return endpoint; @@ -494,7 +495,7 @@ int orte_iof_base_endpoint_delete( while(item != opal_list_get_end(&orte_iof_base.iof_endpoints)) { opal_list_item_t* next = opal_list_get_next(item); orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)item; - if (ORTE_EQUAL == orte_ns.compare_fields(mask,proc,&endpoint->ep_origin)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(mask,proc,&endpoint->ep_origin)) { if (endpoint->ep_tag == tag || ORTE_IOF_ANY == endpoint->ep_tag || ORTE_IOF_ANY == tag) { @@ -568,7 +569,7 @@ orte_iof_base_endpoint_t* orte_iof_base_endpoint_match( item != opal_list_get_end(&orte_iof_base.iof_endpoints); item = opal_list_get_next(item)) { orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)item; - if(ORTE_EQUAL == orte_ns.compare_fields(target_mask,target_name,&endpoint->ep_origin)) { + if(OPAL_EQUAL == orte_util_compare_name_fields(target_mask,target_name,&endpoint->ep_origin)) { if(endpoint->ep_tag == target_tag || endpoint->ep_tag == ORTE_IOF_ANY || target_tag == ORTE_IOF_ANY) { diff --git a/orte/mca/iof/base/iof_base_open.c b/orte/mca/iof/base/iof_base_open.c index 1473023235..8bacf3328e 100644 --- a/orte/mca/iof/base/iof_base_open.c +++ b/orte/mca/iof/base/iof_base_open.c @@ -18,16 +18,22 @@ #include "orte_config.h" +#include "orte/constants.h" + #include #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" +#include "opal/util/output.h" + +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/iof/base/iof_base_header.h" #include "orte/mca/iof/base/iof_base_fragment.h" -#include "opal/util/output.h" /* * The following file was created by configure. It contains extern @@ -53,7 +59,7 @@ int orte_iof_base_open(void) { int id; int int_value; - char* str_value; + char *str_value; /* Initialize globals */ OBJ_CONSTRUCT(&orte_iof_base.iof_components_opened, opal_list_t); @@ -69,9 +75,14 @@ int orte_iof_base_open(void) mca_base_param_lookup_int(id,&int_value); orte_iof_base.iof_window_size = int_value; - id = mca_base_param_register_string("iof","base","service",NULL,"0.0.0"); + /* someone might pass in an iof_service name, so do a little + * dance to setup the default + */ + orte_util_convert_process_name_to_string(&str_value, ORTE_PROC_MY_HNP); + id = mca_base_param_register_string("iof","base","service",NULL,str_value); + free(str_value); mca_base_param_lookup_string(id,&str_value); - orte_ns.convert_string_to_process_name(&orte_iof_base.iof_service, str_value); + orte_util_convert_string_to_process_name(&orte_iof_base.iof_service, str_value); free(str_value); /* Debugging / verbose output */ diff --git a/orte/mca/iof/base/iof_base_select.c b/orte/mca/iof/base/iof_base_select.c index d273dcc71f..f9ff2e0e17 100644 --- a/orte/mca/iof/base/iof_base_select.c +++ b/orte/mca/iof/base/iof_base_select.c @@ -17,6 +17,7 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include "opal/util/output.h" #include "opal/mca/mca.h" diff --git a/orte/mca/iof/base/iof_base_setup.c b/orte/mca/iof/base/iof_base_setup.c index 4850c55d2a..7602b10695 100644 --- a/orte/mca/iof/base/iof_base_setup.c +++ b/orte/mca/iof/base/iof_base_setup.c @@ -23,6 +23,7 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include #ifdef HAVE_UNISTD_H @@ -53,14 +54,13 @@ #include #endif -#include "orte/mca/iof/base/iof_base_setup.h" - -#include "orte/orte_constants.h" #include "opal/util/output.h" #include "opal/util/opal_pty.h" + #include "orte/mca/errmgr/errmgr.h" + #include "orte/mca/iof/iof.h" -#include "orte/mca/ns/ns.h" +#include "orte/mca/iof/base/iof_base_setup.h" int orte_iof_base_setup_prefork(orte_iof_base_io_conf_t *opts) diff --git a/orte/mca/iof/base/iof_base_setup.h b/orte/mca/iof/base/iof_base_setup.h index 7403ef0d58..83872dea20 100644 --- a/orte/mca/iof/base/iof_base_setup.h +++ b/orte/mca/iof/base/iof_base_setup.h @@ -20,7 +20,8 @@ #ifndef IOF_BASE_SETUP_H_ #define IOF_BASE_SETUP_H_ -#include "orte/mca/ns/ns.h" +#include "orte_config.h" +#include "orte/types.h" struct orte_iof_base_io_conf_t { int usepty; diff --git a/orte/mca/iof/iof.h b/orte/mca/iof/iof.h index 612903d3de..161243529d 100644 --- a/orte/mca/iof/iof.h +++ b/orte/mca/iof/iof.h @@ -142,13 +142,16 @@ #define ORTE_IOF_H #include "orte_config.h" +#include "orte/types.h" + #include "opal/class/opal_list.h" #include "opal/mca/mca.h" -#include "orte/mca/ns/ns.h" #include "opal/mca/crs/crs.h" #include "opal/mca/crs/base/base.h" +#include "orte/util/name_fns.h" + BEGIN_C_DECLS /* Predefined tag values */ diff --git a/orte/mca/iof/null/iof_null_module.c b/orte/mca/iof/null/iof_null_module.c index 09f9861d1a..0cfecd350e 100644 --- a/orte/mca/iof/null/iof_null_module.c +++ b/orte/mca/iof/null/iof_null_module.c @@ -18,13 +18,14 @@ */ #include "orte_config.h" +#include "orte/constants.h" + #include #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ #include -#include "orte/orte_constants.h" #include "opal/util/output.h" #include "orte/mca/iof/iof.h" #include "orte/mca/rml/rml.h" diff --git a/orte/mca/iof/proxy/iof_proxy.c b/orte/mca/iof/proxy/iof_proxy.c index abcc078da3..279f7baf37 100644 --- a/orte/mca/iof/proxy/iof_proxy.c +++ b/orte/mca/iof/proxy/iof_proxy.c @@ -18,6 +18,7 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include #ifdef HAVE_UNISTD_H @@ -27,15 +28,19 @@ #include #endif /* HAVE_STRING_H */ -#include "orte/orte_constants.h" #include "opal/util/output.h" + #include "orte/mca/iof/iof.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + #include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/iof/base/iof_base_endpoint.h" -#include "orte/mca/errmgr/errmgr.h" + #include "iof_proxy.h" #include "iof_proxy_svc.h" @@ -84,7 +89,7 @@ int orte_iof_proxy_publish( if (orte_iof_base.iof_output >= 0) { char* name_str; - orte_ns.get_proc_name_string(&name_str, origin); + orte_util_convert_process_name_to_string(&name_str, origin); opal_output(orte_iof_base.iof_output, "orte_iof_proxy_publish(%s,%d,%d,%d)\n", name_str, mode, tag, fd); diff --git a/orte/mca/iof/proxy/iof_proxy_component.c b/orte/mca/iof/proxy/iof_proxy_component.c index c0a3fc319a..f2cf982fd0 100644 --- a/orte/mca/iof/proxy/iof_proxy_component.c +++ b/orte/mca/iof/proxy/iof_proxy_component.c @@ -18,12 +18,16 @@ */ #include "orte_config.h" -#include "orte/util/proc_info.h" + #include "opal/util/output.h" #include "opal/runtime/opal_progress.h" -#include "orte/mca/rml/rml.h" #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/rml/rml.h" +#include "orte/util/proc_info.h" +#include "orte/runtime/orte_globals.h" + #include "orte/mca/iof/base/base.h" #include "orte/mca/iof/base/iof_base_endpoint.h" #include "iof_proxy.h" @@ -89,7 +93,7 @@ static orte_iof_base_module_t* orte_iof_proxy_init(int* priority, bool *allow_multi_user_threads, bool *have_hidden_threads) { int rc; - if(orte_process_info.seed == true) + if(orte_process_info.hnp == true) return NULL; *priority = 1; diff --git a/orte/mca/iof/proxy/iof_proxy_svc.c b/orte/mca/iof/proxy/iof_proxy_svc.c index 8155fbbd6a..5a5a217680 100644 --- a/orte/mca/iof/proxy/iof_proxy_svc.c +++ b/orte/mca/iof/proxy/iof_proxy_svc.c @@ -18,9 +18,12 @@ */ #include "orte_config.h" + #include "opal/util/output.h" + #include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" +#include "orte/runtime/orte_globals.h" + #include "orte/mca/iof/base/base.h" #include "orte/mca/iof/base/iof_base_header.h" #include "orte/mca/iof/base/iof_base_endpoint.h" @@ -67,7 +70,7 @@ int orte_iof_proxy_svc_publish( iov.iov_len = sizeof(hdr); rc = orte_rml.send( - orte_iof_base.iof_service, + &orte_iof_base.iof_service, &iov, 1, ORTE_RML_TAG_IOF_SVC, @@ -105,7 +108,7 @@ int orte_iof_proxy_svc_unpublish( iov.iov_len = sizeof(hdr); rc = orte_rml.send( - orte_iof_base.iof_service, + &orte_iof_base.iof_service, &iov, 1, ORTE_RML_TAG_IOF_SVC, @@ -149,7 +152,7 @@ int orte_iof_proxy_svc_subscribe( iov.iov_len = sizeof(hdr); rc = orte_rml.send( - orte_iof_base.iof_service, + &orte_iof_base.iof_service, &iov, 1, ORTE_RML_TAG_IOF_SVC, @@ -194,7 +197,7 @@ int orte_iof_proxy_svc_unsubscribe( iov.iov_len = sizeof(hdr); rc = orte_rml.send( - orte_iof_base.iof_service, + &orte_iof_base.iof_service, &iov, 1, ORTE_RML_TAG_IOF_SVC, diff --git a/orte/mca/iof/proxy/iof_proxy_svc.h b/orte/mca/iof/proxy/iof_proxy_svc.h index 0e129222a4..f05dd883ac 100644 --- a/orte/mca/iof/proxy/iof_proxy_svc.h +++ b/orte/mca/iof/proxy/iof_proxy_svc.h @@ -21,12 +21,14 @@ #define MCA_IOF_PROXY_SVC_H #include "orte_config.h" -#include "orte/mca/iof/iof.h" -#include "orte/mca/ns/ns.h" +#include "orte/types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +#include "orte/mca/rml/rml_types.h" +#include "orte/util/name_fns.h" + +#include "orte/mca/iof/iof.h" + +BEGIN_C_DECLS /* * Send requests to the svc component @@ -74,8 +76,7 @@ void orte_iof_proxy_svc_recv( void* cbdata); -#if defined(c_plusplus) || defined(__cplusplus) -}; -#endif +END_C_DECLS + #endif diff --git a/orte/mca/iof/svc/iof_svc.c b/orte/mca/iof/svc/iof_svc.c index e61c829105..3c1a434194 100644 --- a/orte/mca/iof/svc/iof_svc.c +++ b/orte/mca/iof/svc/iof_svc.c @@ -18,6 +18,8 @@ * $HEADER$ */ #include "orte_config.h" +#include "orte/constants.h" + #include #ifdef HAVE_UNISTD_H #include @@ -26,9 +28,11 @@ #include #endif /* HAVE_STRING_H */ -#include "orte/orte_constants.h" #include "opal/util/output.h" + #include "orte/mca/oob/base/base.h" +#include "orte/runtime/orte_globals.h" + #include "orte/mca/iof/base/base.h" #include "orte/mca/iof/base/iof_base_endpoint.h" #include "iof_svc.h" diff --git a/orte/mca/iof/svc/iof_svc_component.c b/orte/mca/iof/svc/iof_svc_component.c index 2084092bfa..ea0593ed2a 100644 --- a/orte/mca/iof/svc/iof_svc_component.c +++ b/orte/mca/iof/svc/iof_svc_component.c @@ -18,12 +18,15 @@ */ #include "orte_config.h" -#include "orte/util/proc_info.h" + #include "opal/util/output.h" #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" +#include "orte/runtime/orte_globals.h" + #include "iof_svc.h" #include "iof_svc_proxy.h" #include "iof_svc_pub.h" @@ -147,7 +150,7 @@ orte_iof_svc_init(int* priority, bool *allow_multi_user_threads, bool *have_hidd { int rc; - if (false == orte_process_info.seed) { + if (false == orte_process_info.hnp) { return NULL; } diff --git a/orte/mca/iof/svc/iof_svc_proxy.c b/orte/mca/iof/svc/iof_svc_proxy.c index afae1df672..7abfe6bbd1 100644 --- a/orte/mca/iof/svc/iof_svc_proxy.c +++ b/orte/mca/iof/svc/iof_svc_proxy.c @@ -17,14 +17,19 @@ * $HEADER$ */ #include "orte_config.h" +#include "orte/types.h" +#include "orte/constants.h" + #include "opal/util/output.h" + +#include "orte/class/orte_proc_table.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/name_fns.h" + #include "orte/mca/iof/base/iof_base_header.h" #include "orte/mca/iof/base/iof_base_endpoint.h" #include "orte/mca/iof/base/iof_base_fragment.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/class/orte_proc_table.h" #include "iof_svc.h" #include "iof_svc_proxy.h" #include "iof_svc_pub.h" @@ -173,7 +178,7 @@ static void orte_iof_svc_proxy_msg( /* if the subscription origin doesn't match the message's origin, skip this subscription */ - if (ORTE_EQUAL == orte_ns.compare_fields(sub->origin_mask,&sub->origin_name,&hdr->msg_origin)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(sub->origin_mask,&sub->origin_name,&hdr->msg_origin)) { opal_output(orte_iof_base.iof_output, "sub MATCH: origin %s, msg origin %s, msg proxy %s orte_iof_svc_proxy_msg: tag %d sequence %d, len %d", ORTE_NAME_PRINT(&sub->origin_name), ORTE_NAME_PRINT(&hdr->msg_origin), diff --git a/orte/mca/iof/svc/iof_svc_proxy.h b/orte/mca/iof/svc/iof_svc_proxy.h index eb10f5ec9a..c35b4d2b0b 100644 --- a/orte/mca/iof/svc/iof_svc_proxy.h +++ b/orte/mca/iof/svc/iof_svc_proxy.h @@ -19,15 +19,15 @@ #ifndef ORTE_IOF_SVC_PROXY_H #define ORTE_IOF_SVC_PROXY_H -#include "orte/mca/iof/iof.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - #include "orte_config.h" +#include "orte/types.h" + +#include "orte/mca/rml/rml_types.h" + #include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/iof_base_header.h" +BEGIN_C_DECLS /** * Callback function from RML on receipt of IOF request. @@ -48,8 +48,7 @@ void orte_iof_svc_proxy_recv( orte_rml_tag_t tag, void* cbdata); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS + #endif diff --git a/orte/mca/iof/svc/iof_svc_pub.c b/orte/mca/iof/svc/iof_svc_pub.c index 09ff2aabbc..9ba5d07905 100644 --- a/orte/mca/iof/svc/iof_svc_pub.c +++ b/orte/mca/iof/svc/iof_svc_pub.c @@ -1,5 +1,29 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + #include "orte_config.h" +#include "orte/types.h" +#include "orte/constants.h" + #include "opal/util/output.h" + +#include "orte/util/name_fns.h" + #include "orte/mca/iof/base/iof_base_header.h" #include "iof_svc.h" #include "iof_svc_proxy.h" @@ -36,8 +60,8 @@ int orte_iof_svc_pub_create( item != opal_list_get_end(&mca_iof_svc_component.svc_published); item = opal_list_get_next(item)) { pub = (orte_iof_svc_pub_t*)item; - if(ORTE_EQUAL == orte_ns.compare_fields(pub_mask,pub_name,&pub->pub_name) && - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL,pub_proxy,&pub->pub_proxy) && + if(OPAL_EQUAL == orte_util_compare_name_fields(pub_mask,pub_name,&pub->pub_name) && + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,pub_proxy,&pub->pub_proxy) && pub_tag == pub->pub_tag) { OPAL_THREAD_UNLOCK(&mca_iof_svc_component.svc_lock); return ORTE_SUCCESS; @@ -89,8 +113,8 @@ orte_iof_svc_pub_t* orte_iof_svc_pub_lookup( item != opal_list_get_end(&mca_iof_svc_component.svc_published); item = opal_list_get_next(item)) { orte_iof_svc_pub_t* pub = (orte_iof_svc_pub_t*)item; - if (ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &pub->pub_name,pub_name) && - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &pub->pub_proxy,pub_proxy) && + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &pub->pub_name,pub_name) && + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &pub->pub_proxy,pub_proxy) && pub->pub_mask == pub_mask && pub->pub_tag == pub_tag) { return pub; @@ -150,8 +174,8 @@ void orte_iof_svc_pub_delete_all( opal_list_item_t* p_next = opal_list_get_next(p_item); orte_iof_svc_pub_t* pub = (orte_iof_svc_pub_t*)p_item; - if (ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &pub->pub_name,name) || - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &pub->pub_proxy,name)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &pub->pub_name,name) || + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &pub->pub_proxy,name)) { opal_list_item_t* s_item; for(s_item = opal_list_get_first(&mca_iof_svc_component.svc_subscribed); diff --git a/orte/mca/iof/svc/iof_svc_pub.h b/orte/mca/iof/svc/iof_svc_pub.h index c2659416a1..78a65e08a2 100644 --- a/orte/mca/iof/svc/iof_svc_pub.h +++ b/orte/mca/iof/svc/iof_svc_pub.h @@ -20,14 +20,16 @@ #define ORTE_IOF_SVC_PUBLISH_H #include "orte_config.h" +#include "orte/types.h" + +#include "orte/util/name_fns.h" + #include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/iof/base/iof_base_endpoint.h" #include "iof_svc.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /** * Endpoints that are sinks of data are published by the @@ -87,9 +89,7 @@ int orte_iof_svc_pub_delete( void orte_iof_svc_pub_delete_all( const orte_process_name_t* name); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/iof/svc/iof_svc_sub.c b/orte/mca/iof/svc/iof_svc_sub.c index 1a486ead77..b9ae451f93 100644 --- a/orte/mca/iof/svc/iof_svc_sub.c +++ b/orte/mca/iof/svc/iof_svc_sub.c @@ -21,11 +21,14 @@ #include #include "opal/util/output.h" + +#include "orte/class/orte_proc_table.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/runtime/orte_globals.h" + #include "orte/mca/iof/base/iof_base_header.h" #include "orte/mca/iof/base/iof_base_fragment.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/class/orte_proc_table.h" #include "iof_svc.h" #include "iof_svc_proxy.h" #include "iof_svc_pub.h" @@ -85,10 +88,10 @@ int orte_iof_svc_sub_create( item = opal_list_get_next(item)) { sub = (orte_iof_svc_sub_t*)item; if (sub->origin_mask == origin_mask && - ORTE_EQUAL == orte_ns.compare_fields(sub->origin_mask,&sub->origin_name,origin_name) && + OPAL_EQUAL == orte_util_compare_name_fields(sub->origin_mask,&sub->origin_name,origin_name) && sub->origin_tag == origin_tag && sub->target_mask == target_mask && - ORTE_EQUAL == orte_ns.compare_fields(sub->target_mask,&sub->target_name,target_name) && + OPAL_EQUAL == orte_util_compare_name_fields(sub->target_mask,&sub->target_name,target_name) && sub->target_tag == target_tag) { OPAL_THREAD_UNLOCK(&mca_iof_svc_component.svc_lock); return ORTE_SUCCESS; @@ -197,9 +200,9 @@ void orte_iof_svc_sub_ack( /* If the subscription origin/tag doesn't match the ACK origin/tag, skip it */ - if (ORTE_EQUAL != orte_ns.compare_fields(sub->origin_mask, - &sub->origin_name, - &hdr->msg_origin) || + if (OPAL_EQUAL != orte_util_compare_name_fields(sub->origin_mask, + &sub->origin_name, + &hdr->msg_origin) || sub->origin_tag != hdr->msg_tag) { continue; } @@ -221,9 +224,9 @@ void orte_iof_svc_sub_ack( coming from this process, then update the seq_min calculation */ if (NULL != sub->sub_endpoint && - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, - orte_process_info.my_name, - peer)) { + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + ORTE_PROC_MY_NAME, + peer)) { if (do_close) { /* JMS what to do here? Need to set sub->sub_endpoint to NULL. Have similar leak for do_close for @@ -257,8 +260,8 @@ void orte_iof_svc_sub_ack( /* If the publication origin or publication proxy matches the ACK'ing proxy, save the ACK'ed byte count for this *origin* (not the proxy). */ - if (ORTE_EQUAL == orte_ns.compare_fields(pub->pub_mask,&pub->pub_name,peer) || - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL,&pub->pub_proxy,peer)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(pub->pub_mask,&pub->pub_name,peer) || + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,&pub->pub_proxy,peer)) { opal_output(orte_iof_base.iof_output, "ack: found matching pub"); /* If we're closing, then remove this proc from @@ -313,12 +316,12 @@ void orte_iof_svc_sub_ack( if (seq_min == hdr->msg_seq+hdr->msg_len) { /* If the original message was initiated from this process, then the ACK delivery is local. */ - if (ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, - orte_process_info.my_name, - &hdr->msg_origin) || - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, - orte_process_info.my_name, - &hdr->msg_proxy)) { + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + ORTE_PROC_MY_NAME, + &hdr->msg_origin) || + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + ORTE_PROC_MY_NAME, + &hdr->msg_proxy)) { orte_iof_base_endpoint_t* endpoint; endpoint = orte_iof_base_endpoint_match(&hdr->msg_origin, ORTE_NS_CMP_ALL, @@ -383,10 +386,10 @@ int orte_iof_svc_sub_delete( opal_list_item_t* next = opal_list_get_next(item); orte_iof_svc_sub_t* sub = (orte_iof_svc_sub_t*)item; if (sub->origin_mask == origin_mask && - ORTE_EQUAL == orte_ns.compare_fields(sub->origin_mask,&sub->origin_name,origin_name) && + OPAL_EQUAL == orte_util_compare_name_fields(sub->origin_mask,&sub->origin_name,origin_name) && sub->origin_tag == origin_tag && sub->target_mask == target_mask && - ORTE_EQUAL == orte_ns.compare_fields(sub->target_mask,&sub->target_name,target_name) && + OPAL_EQUAL == orte_util_compare_name_fields(sub->target_mask,&sub->target_name,target_name) && sub->target_tag == target_tag) { opal_list_remove_item(&mca_iof_svc_component.svc_subscribed, item); OBJ_RELEASE(item); @@ -408,9 +411,9 @@ int orte_iof_svc_sub_delete_all( opal_list_item_t* next = opal_list_get_next(item); orte_iof_svc_sub_t* sub = (orte_iof_svc_sub_t*)item; if ((sub->origin_mask == ORTE_NS_CMP_ALL && - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL,&sub->origin_name,name)) || + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,&sub->origin_name,name)) || (sub->target_mask == ORTE_NS_CMP_ALL && - ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL,&sub->target_name,name))) { + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,&sub->target_name,name))) { opal_list_remove_item(&mca_iof_svc_component.svc_subscribed, item); OBJ_RELEASE(item); } @@ -536,7 +539,7 @@ bool orte_iof_svc_fwd_match( orte_iof_svc_sub_t* sub, orte_iof_svc_pub_t* pub) { - if (ORTE_EQUAL == orte_ns.compare_fields(sub->target_mask,&sub->target_name,&pub->pub_name) && + if (OPAL_EQUAL == orte_util_compare_name_fields(sub->target_mask,&sub->target_name,&pub->pub_name) && sub->origin_tag == pub->pub_tag) { return true; } else { diff --git a/orte/mca/iof/svc/iof_svc_sub.h b/orte/mca/iof/svc/iof_svc_sub.h index 416baea6b4..1be6458ad6 100644 --- a/orte/mca/iof/svc/iof_svc_sub.h +++ b/orte/mca/iof/svc/iof_svc_sub.h @@ -25,11 +25,14 @@ * endpoints. */ +#include "orte_config.h" +#include "orte/types.h" + #include "opal/class/opal_hash_table.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +#include "iof_svc_pub.h" + +BEGIN_C_DECLS struct orte_iof_svc_fwd_t { opal_list_item_t super; @@ -137,9 +140,7 @@ int orte_iof_svc_fwd_delete( orte_iof_svc_sub_t* sub, orte_iof_svc_pub_t* pub); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/ns/base/Makefile.am b/orte/mca/ns/base/Makefile.am deleted file mode 100644 index dc6b5b2d81..0000000000 --- a/orte/mca/ns/base/Makefile.am +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h \ - base/ns_private.h - -libmca_ns_la_SOURCES += \ - base/ns_base_close.c \ - base/ns_base_select.c \ - base/ns_base_open.c \ - base/ns_base_node_fns.c \ - base/ns_base_job_fns.c \ - base/ns_base_vpid_name_fns.c \ - base/ns_base_general_fns.c \ - base/ns_base_print_name_args.c \ - base/ns_base_diag_fns.c \ - base/data_type_support/ns_data_type_compare_fns.c \ - base/data_type_support/ns_data_type_copy_fns.c \ - base/data_type_support/ns_data_type_print_fns.c \ - base/data_type_support/ns_data_type_release_fns.c \ - base/data_type_support/ns_data_type_size_fns.c \ - base/data_type_support/ns_data_type_packing_fns.c \ - base/data_type_support/ns_data_type_unpacking_fns.c diff --git a/orte/mca/ns/base/base.h b/orte/mca/ns/base/base.h deleted file mode 100644 index 39591e766b..0000000000 --- a/orte/mca/ns/base/base.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_NS_BASE_H -#define MCA_NS_BASE_H - -/* - * includes - */ -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/mca.h" - -#include "orte/dss/dss_types.h" - -#include "orte/mca/ns/ns.h" - - -/* - * Global functions for MCA overall collective open and close - */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - -/* - * function definitions - */ -ORTE_DECLSPEC int orte_ns_base_open(void); -ORTE_DECLSPEC int orte_ns_base_select(void); -ORTE_DECLSPEC int orte_ns_base_close(void); -ORTE_DECLSPEC int orte_ns_base_init_print_args(void); - -/* - * globals that might be needed - */ - -ORTE_DECLSPEC extern int mca_ns_base_output; -ORTE_DECLSPEC extern bool mca_ns_base_selected; -ORTE_DECLSPEC extern opal_list_t mca_ns_base_components_available; -ORTE_DECLSPEC extern mca_ns_base_component_t mca_ns_base_selected_component; - -/* - * external API functions will be documented in the mca/ns/ns.h file - */ - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c deleted file mode 100755 index a55b1490c5..0000000000 --- a/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/ns_private.h" - -/* - * NUMERIC COMPARE FUNCTIONS - */ -int orte_ns_base_compare_name(orte_process_name_t *value1, - orte_process_name_t *value2, - orte_data_type_t type) -{ - if (NULL == value1 && NULL == value2) { - return ORTE_EQUAL; - } else if (NULL == value1) { - return ORTE_VALUE2_GREATER; - } else if (NULL == value2) { - return ORTE_VALUE1_GREATER; - } - - /* If any of the fields are wildcard, - * then we want to just ignore that one field. In the case - * of ORTE_NAME_WILDCARD (where ALL of the fields are wildcard), this - * will automatically result in ORTE_EQUAL for any name in the other - * value - a totally useless result, but consistent in behavior. - */ - - /** check the jobids - if one of them is WILDCARD, then ignore - * this field since anything is okay - */ - if (value1->jobid != ORTE_JOBID_WILDCARD && - value2->jobid != ORTE_JOBID_WILDCARD) { - if (value1->jobid < value2->jobid) { - return ORTE_VALUE2_GREATER; - } else if (value1->jobid > value2->jobid) { - return ORTE_VALUE1_GREATER; - } - } - - /** check the vpids - if one of them is WILDCARD, then ignore - * this field since anything is okay - */ - if (value1->vpid != ORTE_VPID_WILDCARD && - value2->vpid != ORTE_VPID_WILDCARD) { - if (value1->vpid < value2->vpid) { - return ORTE_VALUE2_GREATER; - } else if (value1->vpid > value2->vpid) { - return ORTE_VALUE1_GREATER; - } - } - - /** only way to get here is if all fields are equal or WILDCARD */ - return ORTE_EQUAL; -} - -int orte_ns_base_compare_vpid(orte_vpid_t *value1, - orte_vpid_t *value2, - orte_data_type_t type) - -{ - /** if either value is WILDCARD, then return equal */ - if (*value1 == ORTE_VPID_WILDCARD || - *value2 == ORTE_VPID_WILDCARD) return ORTE_EQUAL; - - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_ns_base_compare_jobid(orte_jobid_t *value1, - orte_jobid_t *value2, - orte_data_type_t type) -{ - /** if either value is WILDCARD, then return equal */ - if (*value1 == ORTE_JOBID_WILDCARD || - *value2 == ORTE_JOBID_WILDCARD) return ORTE_EQUAL; - - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - -int orte_ns_base_compare_nodeid(orte_nodeid_t *value1, - orte_nodeid_t *value2, - orte_data_type_t type) -{ - /** if either value is WILDCARD, then return equal */ - if (*value1 == ORTE_NODEID_WILDCARD || - *value2 == ORTE_NODEID_WILDCARD) return ORTE_EQUAL; - - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_copy_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_copy_fns.c deleted file mode 100755 index e7e829a959..0000000000 --- a/orte/mca/ns/base/data_type_support/ns_data_type_copy_fns.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/ns_private.h" - -/* - * VPID - */ -int orte_ns_base_copy_vpid(orte_vpid_t **dest, orte_vpid_t *src, orte_data_type_t type) -{ - orte_vpid_t *val; - - val = (orte_vpid_t*)malloc(sizeof(orte_vpid_t)); - if (NULL == val) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - *val = *src; - *dest = val; - - return ORTE_SUCCESS; -} - -/* - * NODEID - */ -int orte_ns_base_copy_nodeid(orte_nodeid_t **dest, orte_nodeid_t *src, orte_data_type_t type) -{ - orte_nodeid_t *val; - - val = (orte_nodeid_t*)malloc(sizeof(orte_nodeid_t)); - if (NULL == val) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - *val = *src; - *dest = val; - - return ORTE_SUCCESS; -} - -/* - * JOBID - */ -int orte_ns_base_copy_jobid(orte_jobid_t **dest, orte_jobid_t *src, orte_data_type_t type) -{ - orte_jobid_t *val; - - val = (orte_jobid_t*)malloc(sizeof(orte_jobid_t)); - if (NULL == val) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - *val = *src; - *dest = val; - - return ORTE_SUCCESS; -} - -/* COPY FUNCTIONS FOR COMPLEX TYPES */ - -/* PROCESS NAME */ -int orte_ns_base_copy_name(orte_process_name_t **dest, orte_process_name_t *src, orte_data_type_t type) -{ - orte_process_name_t *val; - - val = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - if (NULL == val) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - val->jobid = src->jobid; - val->vpid = src->vpid; - - *dest = val; - return ORTE_SUCCESS; -} diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_packing_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_packing_fns.c deleted file mode 100644 index c69301c356..0000000000 --- a/orte/mca/ns/base/data_type_support/ns_data_type_packing_fns.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss_internal.h" - -#include "orte/mca/ns/base/ns_private.h" - -/* - * NAME - */ -int orte_ns_base_pack_name(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int rc; - orte_std_cntr_t i; - orte_process_name_t* proc; - orte_jobid_t *jobid; - orte_vpid_t *vpid; - - /* collect all the jobids in a contiguous array */ - jobid = (orte_jobid_t*)malloc(num_vals * sizeof(orte_jobid_t)); - if (NULL == jobid) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - proc = (orte_process_name_t*)src; - for (i=0; i < num_vals; i++) { - jobid[i] = proc->jobid; - proc++; - } - /* now pack them in one shot */ - if (ORTE_SUCCESS != (rc = - orte_ns_base_pack_jobid(buffer, jobid, num_vals, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - free(jobid); - return rc; - } - free(jobid); - - /* collect all the vpids in a contiguous array */ - vpid = (orte_vpid_t*)malloc(num_vals * sizeof(orte_vpid_t)); - if (NULL == vpid) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - proc = (orte_process_name_t*)src; - for (i=0; i < num_vals; i++) { - vpid[i] = proc->vpid; - proc++; - } - /* now pack them in one shot */ - if (ORTE_SUCCESS != (rc = - orte_ns_base_pack_vpid(buffer, vpid, num_vals, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - free(vpid); - return rc; - } - free(vpid); - - return ORTE_SUCCESS; -} - -/* - * NODEID - */ -int orte_ns_base_pack_nodeid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_NODEID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * JOBID - */ -int orte_ns_base_pack_jobid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_JOBID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * VPID - */ -int orte_ns_base_pack_vpid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_VPID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_print_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_print_fns.c deleted file mode 100755 index 13d1c5082d..0000000000 --- a/orte/mca/ns/base/data_type_support/ns_data_type_print_fns.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/ns_private.h" - -static void orte_ns_base_quick_print(char **output, char *type_name, char *pfx, void *src, size_t src_size); - -/* - * STANDARD PRINT FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ -int orte_ns_base_std_print(char **output, char *prefix, void *src, orte_data_type_t type) -{ - /* set default result */ - *output = NULL; - - switch(type) { - case ORTE_VPID: - orte_ns_base_quick_print(output, "ORTE_VPID", prefix, src, sizeof(orte_vpid_t)); - break; - - case ORTE_JOBID: - orte_ns_base_quick_print(output, "ORTE_JOBID", prefix, src, sizeof(orte_jobid_t)); - break; - - case ORTE_NODEID: - orte_ns_base_quick_print(output, "ORTE_NODEID", prefix, src, sizeof(orte_nodeid_t)); - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - return ORTE_SUCCESS; -} - -/* - * NAME - */ -int orte_ns_base_print_name(char **output, char *prefix, orte_process_name_t *name, orte_data_type_t type) -{ - /* set default result */ - *output = NULL; - - if (NULL == name) { - asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: NULL", - (NULL == prefix ? " " : prefix)); - } else { - asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%ld,%ld]", - (NULL == prefix ? " " : prefix), - (long)name->jobid, (long)name->vpid); - } - - return ORTE_SUCCESS; -} - - -static void orte_ns_base_quick_print(char **output, char *type_name, char *prefix, void *src, size_t src_size) -{ - int8_t *i8; - int16_t *i16; - int32_t *i32; - int64_t *i64; - char *pfx; - - /* set default result */ - *output = NULL; - - /* protect against NULL pfx */ - if (NULL == prefix) { - asprintf(&pfx, " "); - } else { - asprintf(&pfx, "%s", prefix); - } - - - switch(src_size) { - case 1: - i8 = (int8_t*)src; - asprintf(output, "%sData type: %s\tValue: %d", pfx, type_name, (int) *i8); - break; - - case 2: - i16 = (int16_t*)src; - asprintf(output, "%sData type: %s\tValue: %d", pfx, type_name, (int) *i16); - break; - - case 4: - i32 = (int32_t*)src; - asprintf(output, "%sData type: %s\tValue: %ld", pfx, type_name, (long) *i32); - break; - - case 8: - i64 = (int64_t*)src; - asprintf(output, "%sData type: %s\tValue: %ld", pfx, type_name, (long) *i64); - break; - - default: - return; - } - - free(pfx); - return; -} diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_size_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_size_fns.c deleted file mode 100755 index fbe7bb218a..0000000000 --- a/orte/mca/ns/base/data_type_support/ns_data_type_size_fns.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/ns_private.h" - -/* - * STANDARD SIZE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ -int orte_ns_base_std_size(size_t *size, void *src, orte_data_type_t type) -{ - switch(type) { - case ORTE_VPID: - *size = sizeof(orte_vpid_t); - break; - - case ORTE_JOBID: - *size = sizeof(orte_jobid_t); - break; - - case ORTE_NODEID: - *size = sizeof(orte_nodeid_t); - break; - - case ORTE_NAME: - *size = sizeof(orte_process_name_t); - break; - - default: - ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); - return ORTE_ERR_UNKNOWN_DATA_TYPE; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_unpacking_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_unpacking_fns.c deleted file mode 100644 index b3487ade14..0000000000 --- a/orte/mca/ns/base/data_type_support/ns_data_type_unpacking_fns.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss_internal.h" - -#include "orte/mca/ns/base/ns_private.h" - -/* - * NAME - */ -int orte_ns_base_unpack_name(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int rc; - orte_std_cntr_t i, num; - orte_process_name_t* proc; - orte_jobid_t *jobid; - orte_vpid_t *vpid; - - num = *num_vals; - - /* allocate space for all the jobids in a contiguous array */ - jobid = (orte_jobid_t*)malloc(num * sizeof(orte_jobid_t)); - if (NULL == jobid) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - *num_vals = 0; - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* now unpack them in one shot */ - if (ORTE_SUCCESS != (rc = - orte_ns_base_unpack_jobid(buffer, jobid, num_vals, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - *num_vals = 0; - free(jobid); - return rc; - } - - /* collect all the vpids in a contiguous array */ - vpid = (orte_vpid_t*)malloc(num * sizeof(orte_vpid_t)); - if (NULL == vpid) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - *num_vals = 0; - free(jobid); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* now unpack them in one shot */ - if (ORTE_SUCCESS != (rc = - orte_ns_base_unpack_vpid(buffer, vpid, num_vals, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - *num_vals = 0; - free(vpid); - free(jobid); - return rc; - } - - /* build the names from the jobid/vpid arrays */ - proc = (orte_process_name_t*)dest; - for (i=0; i < num; i++) { - proc->jobid = jobid[i]; - proc->vpid = vpid[i]; - proc++; - } - - /* cleanup */ - free(vpid); - free(jobid); - - return ORTE_SUCCESS; -} - -/* - * NODEID - */ -int orte_ns_base_unpack_nodeid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_NODEID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * JOBID - */ -int orte_ns_base_unpack_jobid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_JOBID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -/* - * VPID - */ -int orte_ns_base_unpack_vpid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_VPID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - diff --git a/orte/mca/ns/base/ns_base_diag_fns.c b/orte/mca/ns/base/ns_base_diag_fns.c deleted file mode 100644 index d31661ba10..0000000000 --- a/orte/mca/ns/base/ns_base_diag_fns.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include -#include -#include -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/printf.h" -#include "opal/mca/mca.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/base.h" -#include "orte/mca/ns/base/ns_private.h" - -/* - * "not available" functions - */ - -int -orte_ns_base_dump_jobs_not_available(void) -{ - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int -orte_ns_base_dump_tags_not_available(void) -{ - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int -orte_ns_base_dump_datatypes_not_available(void) -{ - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -/**** DIAGNOSTIC FUNCTIONS ****/ -int orte_ns_base_print_dump(orte_buffer_t *buffer) -{ - char *line; - orte_std_cntr_t n; - - n = 1; - while (ORTE_SUCCESS == orte_dss.unpack(buffer, &line, &n, ORTE_STRING)) { - opal_output(mca_ns_base_output, "%s", line); - free(line); - n=1; - } - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/ns/base/ns_base_general_fns.c b/orte/mca/ns/base/ns_base_general_fns.c deleted file mode 100644 index f0aa4cd605..0000000000 --- a/orte/mca/ns/base/ns_base_general_fns.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include -#include -#include -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/printf.h" -#include "opal/mca/mca.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/ns_private.h" - -/* - * "not available" functions - */ -int -orte_ns_base_module_init_not_available(void) -{ - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int -orte_ns_base_assign_rml_tag_not_available(orte_rml_tag_t *tag, char *name) -{ - *tag = ORTE_RML_TAG_MAX; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int -orte_ns_base_define_data_type_not_available( - const char *name, - orte_data_type_t *type) -{ - *type = ORTE_DSS_ID_MAX; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -/**** GET PEERS ****/ -int -orte_ns_base_get_peers_not_available(orte_process_name_t **procs, - orte_std_cntr_t *num_procs, opal_list_t *attributes) -{ - *procs = NULL; - *num_procs = 0; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - - -/**** FT Event ****/ -int -orte_ns_base_ft_event_not_available(int state) -{ - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} diff --git a/orte/mca/ns/base/ns_base_job_fns.c b/orte/mca/ns/base/ns_base_job_fns.c deleted file mode 100644 index 3b67b015a9..0000000000 --- a/orte/mca/ns/base/ns_base_job_fns.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include -#include -#include -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/printf.h" -#include "opal/mca/mca.h" - -#include "orte/mca/schema/schema_types.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/ns_private.h" - -/* - * "not available" functions - */ -int -orte_ns_base_create_jobid_not_available(orte_jobid_t *jobid, opal_list_t *attrs) -{ - *jobid = ORTE_JOBID_INVALID; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int -orte_ns_base_reserve_range_not_available(orte_jobid_t job, - orte_vpid_t range, - orte_vpid_t *startvpid) -{ - *startvpid = ORTE_VPID_INVALID; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int -orte_ns_base_get_vpid_range_not_available(orte_jobid_t job, - orte_vpid_t *range) -{ - *range = ORTE_VPID_INVALID; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int orte_ns_base_get_job_descendants_not_available(orte_jobid_t** descendants, - orte_std_cntr_t *num_desc, - orte_jobid_t job) -{ - *descendants = NULL; - *num_desc = 0; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int orte_ns_base_get_job_children_not_available(orte_jobid_t** children, - orte_std_cntr_t *num_childs, - orte_jobid_t job) -{ - *children = NULL; - *num_childs = 0; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int orte_ns_base_get_root_job_not_available(orte_jobid_t *root_job, orte_jobid_t job) -{ - *root_job = ORTE_JOBID_INVALID; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int orte_ns_base_get_parent_job_not_available(orte_jobid_t *parent, orte_jobid_t job) -{ - *parent = ORTE_JOBID_INVALID; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int orte_ns_base_get_job_family_not_available(orte_jobid_t** family, - orte_std_cntr_t *num_members, - orte_jobid_t job) -{ - *family = NULL; - *num_members = 0; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - - -/**** JOB STRING FUNCTIONS ****/ -int orte_ns_base_get_jobid_string(char **jobid_string, const orte_process_name_t* name) -{ - if (NULL == name) { /* got an error */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *jobid_string = NULL; - return ORTE_ERR_BAD_PARAM; - } - - /* check for wildcard value - handle appropriately */ - if (ORTE_JOBID_WILDCARD == name->jobid) { - *jobid_string = strdup(ORTE_SCHEMA_WILDCARD_STRING); - return ORTE_SUCCESS; - } - - if (0 > asprintf(jobid_string, "%ld", (long) name->jobid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - return ORTE_SUCCESS; -} - - -int orte_ns_base_convert_jobid_to_string(char **jobid_string, const orte_jobid_t jobid) -{ - /* check for wildcard value - handle appropriately */ - if (ORTE_JOBID_WILDCARD == jobid) { - *jobid_string = strdup(ORTE_SCHEMA_WILDCARD_STRING); - return ORTE_SUCCESS; - } - - if (0 > asprintf(jobid_string, "%ld", (long) jobid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - return ORTE_SUCCESS; -} - - -int orte_ns_base_convert_string_to_jobid(orte_jobid_t *jobid, const char* jobidstring) -{ - long int tmpint; - - if (NULL == jobidstring) { /* got an error */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *jobid = ORTE_JOBID_INVALID; - return ORTE_ERR_BAD_PARAM; - } - - tmpint = strtoul(jobidstring, NULL, 10); - - /* check for invalid value */ - if (ORTE_JOBID_INVALID == tmpint) { - *jobid = ORTE_JOBID_INVALID; - return ORTE_SUCCESS; - } - - if (ORTE_JOBID_MAX >= tmpint && ORTE_JOBID_MIN <= tmpint) { - *jobid = (orte_jobid_t)tmpint; - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *jobid = ORTE_JOBID_INVALID; - return ORTE_ERR_BAD_PARAM; - } - - return ORTE_SUCCESS; -} - - diff --git a/orte/mca/ns/base/ns_base_node_fns.c b/orte/mca/ns/base/ns_base_node_fns.c deleted file mode 100644 index 7a4ca13791..0000000000 --- a/orte/mca/ns/base/ns_base_node_fns.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include -#include -#include -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/printf.h" -#include "opal/mca/mca.h" - -#include "orte/mca/schema/schema_types.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/ns_private.h" - -/* - * "not available" functions - */ -int -orte_ns_base_create_nodeids_not_available(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodename) -{ - *nodeids = NULL; - *nnodes = 0; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int -orte_ns_base_get_node_info_not_available(char ***nodenames, orte_std_cntr_t num_nodeids, orte_nodeid_t *nodeids) -{ - *nodenames = NULL; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - - -/**** NODEID STRING FUNCTIONS ****/ -int orte_ns_base_convert_nodeid_to_string(char **string, const orte_nodeid_t nodeid) -{ - *string = NULL; - - /* check for wildcard value - handle appropriately */ - if (ORTE_NODEID_WILDCARD == nodeid) { - *string = strdup(ORTE_SCHEMA_WILDCARD_STRING); - return ORTE_SUCCESS; - } - - /* check for invalid value - handle appropriately */ - if (ORTE_NODEID_INVALID == nodeid) { - *string = strdup(ORTE_SCHEMA_INVALID_STRING); - return ORTE_SUCCESS; - } - - if (0 > asprintf(string, "%ld", (long)nodeid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - return ORTE_SUCCESS; -} - - -int orte_ns_base_convert_string_to_nodeid(orte_nodeid_t *nodeid, const char* string) -{ - long int tmpint; - - if (NULL == string) { /* got an error */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *nodeid = ORTE_NODEID_INVALID; - return ORTE_ERR_BAD_PARAM; - } - - /** check for wildcard character - handle appropriately */ - if (0 == strcmp(ORTE_SCHEMA_WILDCARD_STRING, string)) { - *nodeid = ORTE_NODEID_WILDCARD; - return ORTE_SUCCESS; - } - - /* check for invalid value */ - if (0 == strcmp(ORTE_SCHEMA_INVALID_STRING, string)) { - *nodeid = ORTE_NODEID_INVALID; - return ORTE_SUCCESS; - } - - tmpint = strtol(string, NULL, 10); - - if (ORTE_NODEID_MAX >= tmpint && ORTE_NODEID_MIN <= tmpint) { - *nodeid = (orte_nodeid_t)tmpint; - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *nodeid = ORTE_NODEID_INVALID; - return ORTE_ERR_BAD_PARAM; - } - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/ns/base/ns_base_open.c b/orte/mca/ns/base/ns_base_open.c deleted file mode 100644 index e63f086282..0000000000 --- a/orte/mca/ns/base/ns_base_open.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" -#include "orte/mca/errmgr/errmgr.h" -#include "opal/util/output.h" - -#include "orte/dss/dss.h" - -#include "orte/mca/ns/base/base.h" -#include "orte/mca/ns/base/ns_private.h" - - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "orte/mca/ns/base/static-components.h" - -/* - * globals - */ - -orte_process_name_t orte_ns_name_wildcard = {ORTE_JOBID_WILDCARD, ORTE_VPID_WILDCARD}; -orte_process_name_t orte_ns_name_invalid = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}; -orte_process_name_t orte_ns_name_my_hnp = {0, 0}; - -/* - * Global variables - */ -int mca_ns_base_output = -1; -mca_ns_base_module_t orte_ns = { - /* init */ - orte_ns_base_module_init_not_available, - /* node functions */ - orte_ns_base_create_nodeids_not_available, - orte_ns_base_get_node_info_not_available, - orte_ns_base_convert_nodeid_to_string, - orte_ns_base_convert_string_to_nodeid, - /* jobid functions */ - orte_ns_base_create_jobid_not_available, - orte_ns_base_get_job_descendants_not_available, - orte_ns_base_get_job_children_not_available, - orte_ns_base_get_root_job_not_available, - orte_ns_base_get_parent_job_not_available, - orte_ns_base_get_job_family_not_available, - orte_ns_base_get_jobid_string, - orte_ns_base_convert_jobid_to_string, - orte_ns_base_convert_string_to_jobid, - orte_ns_base_reserve_range_not_available, - orte_ns_base_get_vpid_range_not_available, - /* vpid functions */ - orte_ns_base_get_vpid_string, - orte_ns_base_convert_vpid_to_string, - orte_ns_base_convert_string_to_vpid, - /* name functions */ - orte_ns_base_create_process_name, - orte_ns_base_create_my_name_not_available, - orte_ns_base_convert_string_to_process_name, - orte_ns_base_get_proc_name_string, - orte_ns_base_compare_fields, - /* peer functions */ - orte_ns_base_get_peers_not_available, - /* tag server functions */ - orte_ns_base_assign_rml_tag_not_available, - /* data type functions */ - orte_ns_base_define_data_type_not_available, - /* diagnostic functions */ - orte_ns_base_dump_jobs_not_available, - orte_ns_base_dump_tags_not_available, - orte_ns_base_dump_datatypes_not_available, - - orte_ns_base_ft_event_not_available -}; - -bool mca_ns_base_selected = false; -opal_list_t mca_ns_base_components_available; -mca_ns_base_component_t mca_ns_base_selected_component; - - -/* constructor - used to initialize namelist instance */ -static void orte_namelist_construct(orte_namelist_t* list) -{ - list->name = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_namelist_destructor(orte_namelist_t* list) -{ - if (NULL != list->name) { - free(list->name); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_namelist_t, /* type name */ - opal_list_item_t, /* parent "class" name */ - orte_namelist_construct, /* constructor */ - orte_namelist_destructor); /* destructor */ - - - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -int orte_ns_base_open(void) -{ - int param, value, rc; - orte_data_type_t tmp; - opal_output_stream_t kill_prefix; - - /* Debugging / verbose output */ - /** setup the structure to kill the blasted prefix that opal_output - * now defaults to including so the output can be legible again! - */ - OBJ_CONSTRUCT(&kill_prefix, opal_output_stream_t); - kill_prefix.lds_want_stderr = true; - kill_prefix.lds_prefix = NULL; - - param = mca_base_param_reg_int_name("ns", "base_verbose", - "Verbosity level for the ns framework", - false, false, 0, &value); - if (value != 0) { - kill_prefix.lds_verbose_level = value; - } - mca_ns_base_output = opal_output_open(&kill_prefix); - - /* setup the print_args function */ - if (ORTE_SUCCESS != (rc = orte_ns_base_init_print_args())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* register the base system types with the DPS */ - tmp = ORTE_NAME; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_ns_base_pack_name, - orte_ns_base_unpack_name, - (orte_dss_copy_fn_t)orte_ns_base_copy_name, - (orte_dss_compare_fn_t)orte_ns_base_compare_name, - (orte_dss_size_fn_t)orte_ns_base_std_size, - (orte_dss_print_fn_t)orte_ns_base_print_name, - (orte_dss_release_fn_t)orte_ns_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_NAME", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_VPID; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_ns_base_pack_vpid, - orte_ns_base_unpack_vpid, - (orte_dss_copy_fn_t)orte_ns_base_copy_vpid, - (orte_dss_compare_fn_t)orte_ns_base_compare_vpid, - (orte_dss_size_fn_t)orte_ns_base_std_size, - (orte_dss_print_fn_t)orte_ns_base_std_print, - (orte_dss_release_fn_t)orte_ns_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_VPID", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_JOBID; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_ns_base_pack_jobid, - orte_ns_base_unpack_jobid, - (orte_dss_copy_fn_t)orte_ns_base_copy_jobid, - (orte_dss_compare_fn_t)orte_ns_base_compare_jobid, - (orte_dss_size_fn_t)orte_ns_base_std_size, - (orte_dss_print_fn_t)orte_ns_base_std_print, - (orte_dss_release_fn_t)orte_ns_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_JOBID", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tmp = ORTE_NODEID; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_ns_base_pack_nodeid, - orte_ns_base_unpack_nodeid, - (orte_dss_copy_fn_t)orte_ns_base_copy_nodeid, - (orte_dss_compare_fn_t)orte_ns_base_compare_nodeid, - (orte_dss_size_fn_t)orte_ns_base_std_size, - (orte_dss_print_fn_t)orte_ns_base_std_print, - (orte_dss_release_fn_t)orte_ns_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_NODEID", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* Open up all available components */ - - if (ORTE_SUCCESS != - mca_base_components_open("ns", mca_ns_base_output, - mca_ns_base_static_components, - &mca_ns_base_components_available, true)) { - return ORTE_ERROR; - } - - /* All done */ - - return ORTE_SUCCESS; -} diff --git a/orte/mca/ns/base/ns_base_print_name_args.c b/orte/mca/ns/base/ns_base_print_name_args.c deleted file mode 100644 index 202a24f5ba..0000000000 --- a/orte/mca/ns/base/ns_base_print_name_args.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include -#include - -#include "opal/util/output.h" -#include "opal/util/printf.h" -#include "opal/threads/tsd.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/ns/base/base.h" - -#define ORTE_PRINT_NAME_ARGS_MAX_SIZE 20 -#define ORTE_PRINT_NAME_ARG_NUM_BUFS 8 - -static opal_tsd_key_t print_args_tsd_key; -char* orte_print_args_null = "NULL"; -typedef struct { - char *buffers[ORTE_PRINT_NAME_ARG_NUM_BUFS]; - int cntr; -} orte_print_args_buffers_t; - -static void -buffer_cleanup(void *value) -{ - int i; - orte_print_args_buffers_t *ptr; - - if (NULL != value) { - ptr = (orte_print_args_buffers_t*)value; - for (i=0; i < ORTE_PRINT_NAME_ARG_NUM_BUFS; i++) { - free(ptr->buffers[i]); - } - } -} - -static orte_print_args_buffers_t* -get_print_name_buffer(void) -{ - orte_print_args_buffers_t *ptr; - int ret, i; - - ret = opal_tsd_getspecific(print_args_tsd_key, (void**)&ptr); - if (OPAL_SUCCESS != ret) return NULL; - - if (NULL == ptr) { - ptr = (orte_print_args_buffers_t*)malloc(sizeof(orte_print_args_buffers_t)); - for (i=0; i < ORTE_PRINT_NAME_ARG_NUM_BUFS; i++) { - ptr->buffers[i] = (char *) malloc((ORTE_PRINT_NAME_ARGS_MAX_SIZE+1) * sizeof(char)); - } - ptr->cntr = 0; - ret = opal_tsd_setspecific(print_args_tsd_key, (void*)ptr); - } - - return (orte_print_args_buffers_t*) ptr; -} - -char* orte_ns_base_print_name_args(const orte_process_name_t *name) -{ - orte_print_args_buffers_t *ptr = get_print_name_buffer(); - - if (NULL == ptr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return orte_print_args_null; - } - - /* cycle around the ring */ - if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { - ptr->cntr = 0; - } - - if (NULL == name) { - snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "[NO-NAME]"); - } else { - snprintf(ptr->buffers[ptr->cntr++], - ORTE_PRINT_NAME_ARGS_MAX_SIZE, - "[%ld,%ld]", ORTE_NAME_ARGS(name)); - } - return ptr->buffers[ptr->cntr-1]; -} - -int -orte_ns_base_init_print_args(void) -{ - return opal_tsd_key_create(&print_args_tsd_key, buffer_cleanup); -} diff --git a/orte/mca/ns/base/ns_base_select.c b/orte/mca/ns/base/ns_base_select.c deleted file mode 100644 index fac7c4ceff..0000000000 --- a/orte/mca/ns/base/ns_base_select.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "orte/mca/ns/base/base.h" - - -/** - * Function for selecting one component from all those that are - * available. - */ -int orte_ns_base_select(void) -{ - opal_list_item_t *item; - mca_base_component_list_item_t *cli; - mca_ns_base_component_t *component, *best_component = NULL; - mca_ns_base_module_t *module, *best_module = NULL; - int priority, best_priority = -1; - - /* Iterate through all the available components */ - - for (item = opal_list_get_first(&mca_ns_base_components_available); - item != opal_list_get_end(&mca_ns_base_components_available); - item = opal_list_get_next(item)) { - cli = (mca_base_component_list_item_t *) item; - component = (mca_ns_base_component_t *) cli->cli_component; - - /* Call the component's init function and see if it wants to be - selected */ - - module = component->ns_init(&priority); - - /* If we got a non-NULL module back, then the component wants to - be selected. So save its multi/hidden values and save the - module with the highest priority */ - - if (NULL != module) { - /* If this is the best one, save it */ - - if (priority > best_priority) { - - /* If there was a previous best one, finalize */ - - if (NULL != best_component) { - best_component->ns_finalize(); - } - - /* Save the new best one */ - - best_module = module; - best_component = component; - - /* update the best priority */ - best_priority = priority; - } - - /* If it's not the best one, finalize it */ - - else { - component->ns_finalize(); - } - } - } - - /* If we didn't find one to select, barf */ - - if (NULL == best_component) { - return ORTE_ERROR; - } - - /* We have happiness -- save the component and module for later - usage */ - - orte_ns = *best_module; - mca_ns_base_selected_component = *best_component; - mca_ns_base_selected = true; - - /* all done */ - - return ORTE_SUCCESS; -} diff --git a/orte/mca/ns/base/ns_base_vpid_name_fns.c b/orte/mca/ns/base/ns_base_vpid_name_fns.c deleted file mode 100644 index 59df9bb29a..0000000000 --- a/orte/mca/ns/base/ns_base_vpid_name_fns.c +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include -#include -#include -#include -#if HAVE_NETINET_IN_H -#include -#endif - -#include "orte/orte_constants.h" - -#include "opal/util/output.h" -#include "opal/util/printf.h" -#include "opal/mca/mca.h" - -#include "orte/mca/schema/schema_types.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/ns_private.h" - -/* - * "not available" functions - */ -int -orte_ns_base_create_my_name_not_available(void) -{ - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -/**** NAME STRING FUNCTIONS ****/ - -int orte_ns_base_get_proc_name_string(char **name_string, - const orte_process_name_t* name) -{ - char *tmp; - - if (NULL == name) { /* got an error */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* check for wildcard and invalid values - where encountered, insert the - * corresponding string so we can correctly parse the name string when - * it is passed back to us later - */ - if (ORTE_JOBID_WILDCARD == name->jobid) { - asprintf(&tmp, "%s", ORTE_SCHEMA_WILDCARD_STRING); - } else if (ORTE_JOBID_INVALID == name->jobid) { - asprintf(&tmp, "%s", ORTE_SCHEMA_INVALID_STRING); - } else { - asprintf(&tmp, "%ld", (long)name->jobid); - } - - if (ORTE_VPID_WILDCARD == name->vpid) { - asprintf(name_string, "%s%c%s", tmp, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_WILDCARD_STRING); - } else if (ORTE_VPID_INVALID == name->vpid) { - asprintf(name_string, "%s%c%s", tmp, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_INVALID_STRING); - } else { - asprintf(name_string, "%s%c%ld", tmp, ORTE_SCHEMA_DELIMITER_CHAR, (long)name->vpid); - } - free(tmp); - - return ORTE_SUCCESS; -} - -int orte_ns_base_convert_string_to_process_name(orte_process_name_t **name, - const char* name_string) -{ - char *temp, *token; - orte_jobid_t job; - orte_vpid_t vpid; - long int tmpint; - int return_code=ORTE_SUCCESS; - - /* check for NULL string - error */ - if (NULL == name_string) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - temp = strdup(name_string); /** copy input string as the strtok process is destructive */ - token = strtok(temp, ORTE_SCHEMA_DELIMITER_STRING); /** get first field -> jobid */ - - /* check for error */ - if (NULL == token) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /** convert to largest possible int - then - * check to ensure it is within range of jobid_t before casting */ - - /** first, though, check for WILDCARD character - assign - * value accordingly, if found - */ - if (0 == strcmp(token, ORTE_SCHEMA_WILDCARD_STRING)) { - job = ORTE_JOBID_WILDCARD; - } else if (0 == strcmp(token, ORTE_SCHEMA_INVALID_STRING)) { - job = ORTE_JOBID_INVALID; - } else { - tmpint = strtol(token, NULL, 10); - if (ORTE_JOBID_MAX >= tmpint && ORTE_JOBID_MIN <= tmpint) { - job = (orte_jobid_t)tmpint; - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return_code = ORTE_ERR_BAD_PARAM; - goto CLEANUP; - } - } - - token = strtok(NULL, ORTE_SCHEMA_DELIMITER_STRING); /** get next field -> vpid */ - - /* check for error */ - if (NULL == token) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /** convert to largest possible int then - * check to ensure it is within range of vpid_t before casting */ - - /** first, though, check for WILDCARD character - assign - * value accordingly, if found - */ - if (0 == strcmp(token, ORTE_SCHEMA_WILDCARD_STRING)) { - vpid = ORTE_VPID_WILDCARD; - } else if (0 == strcmp(token, ORTE_SCHEMA_INVALID_STRING)) { - vpid = ORTE_VPID_INVALID; - } else { - tmpint = strtol(token, NULL, 10); - if (ORTE_VPID_MAX >= tmpint && ORTE_VPID_MIN <= tmpint) { - vpid = (orte_vpid_t)tmpint; - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return_code = ORTE_ERR_BAD_PARAM; - goto CLEANUP; - } - } - - if (ORTE_SUCCESS != (return_code = - orte_ns_base_create_process_name(name, job, vpid))) { - ORTE_ERROR_LOG(return_code); - } - -CLEANUP: - free(temp); - - return return_code; -} - -/**** CREATE PROCESS NAME ****/ -int orte_ns_base_create_process_name(orte_process_name_t **name, - orte_jobid_t job, - orte_vpid_t vpid) -{ - *name = NULL; - - *name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - if (NULL == *name) { /* got an error */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - (*name)->jobid = job; - (*name)->vpid = vpid; - return ORTE_SUCCESS; -} - - -/**** VPID STRING FUNCTIONS ****/ -int orte_ns_base_get_vpid_string(char **vpid_string, const orte_process_name_t* name) -{ - if (NULL == name) { /* got an error */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *vpid_string = NULL; - return ORTE_ERR_BAD_PARAM; - } - - /* check for wildcard value - handle appropriately */ - if (ORTE_VPID_WILDCARD == name->vpid) { - *vpid_string = strdup(ORTE_SCHEMA_WILDCARD_STRING); - return ORTE_SUCCESS; - } - - /* check for invalid value - handle appropriately */ - if (ORTE_VPID_INVALID == name->vpid) { - *vpid_string = strdup(ORTE_SCHEMA_INVALID_STRING); - return ORTE_SUCCESS; - } - - if (0 > asprintf(vpid_string, "%ld", (long) name->vpid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - return ORTE_SUCCESS; -} - - -int orte_ns_base_convert_vpid_to_string(char **vpid_string, const orte_vpid_t vpid) -{ - /* check for wildcard value - handle appropriately */ - if (ORTE_VPID_WILDCARD == vpid) { - *vpid_string = strdup(ORTE_SCHEMA_WILDCARD_STRING); - return ORTE_SUCCESS; - } - - /* check for invalid value - handle appropriately */ - if (ORTE_VPID_INVALID == vpid) { - *vpid_string = strdup(ORTE_SCHEMA_INVALID_STRING); - return ORTE_SUCCESS; - } - - if (0 > asprintf(vpid_string, "%ld", (long) vpid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - return ORTE_SUCCESS; -} - - -int orte_ns_base_convert_string_to_vpid(orte_vpid_t *vpid, const char* vpidstring) -{ - long int tmpint; - - if (NULL == vpidstring) { /* got an error */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *vpid = ORTE_VPID_INVALID; - return ORTE_ERR_BAD_PARAM; - } - - /** check for wildcard character - handle appropriately */ - if (0 == strcmp(ORTE_SCHEMA_WILDCARD_STRING, vpidstring)) { - *vpid = ORTE_VPID_WILDCARD; - return ORTE_SUCCESS; - } - - /* check for invalid value */ - if (0 == strcmp(ORTE_SCHEMA_INVALID_STRING, vpidstring)) { - *vpid = ORTE_VPID_INVALID; - return ORTE_SUCCESS; - } - - tmpint = strtol(vpidstring, NULL, 10); - - if (ORTE_VPID_MAX >= tmpint && ORTE_VPID_MIN <= tmpint) { - *vpid = (orte_vpid_t)tmpint; - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *vpid = ORTE_VPID_INVALID; - return ORTE_ERR_BAD_PARAM; - } - - return ORTE_SUCCESS; -} - -/**** COMPARE NAME FIELDS ****/ -int orte_ns_base_compare_fields(orte_ns_cmp_bitmask_t fields, - const orte_process_name_t* name1, - const orte_process_name_t* name2) -{ - /* handle the NULL pointer case */ - if (NULL == name1 && NULL == name2) { - return ORTE_EQUAL; - } else if (NULL == name1) { - return ORTE_VALUE2_GREATER; - } else if (NULL == name2) { - return ORTE_VALUE1_GREATER; - } - - /* in this comparison function, we check for exact equalities. - * In the case of wildcards, we check to ensure that the fields - * actually match those values - thus, a "wildcard" in this - * function does not actually stand for a wildcard value, but - * rather a specific value - */ - - /* check job id */ - - if (ORTE_NS_CMP_JOBID & fields) { - if (name1->jobid < name2->jobid) { - return ORTE_VALUE2_GREATER; - } else if (name1->jobid > name2->jobid) { - return ORTE_VALUE1_GREATER; - } - } - - /* get here if jobid's are equal, or not being checked - * now check vpid - */ - - if (ORTE_NS_CMP_VPID & fields) { - if (name1->vpid < name2->vpid) { - return ORTE_VALUE2_GREATER; - } else if (name1->vpid > name2->vpid) { - return ORTE_VALUE1_GREATER; - } - } - - /* only way to get here is if all fields are being checked and are equal, - * or jobid not checked, but vpid equal, - * only vpid being checked, and equal - * return that fact - */ - return ORTE_EQUAL; -} diff --git a/orte/mca/ns/base/ns_private.h b/orte/mca/ns/base/ns_private.h deleted file mode 100644 index f1f2eeddf3..0000000000 --- a/orte/mca/ns/base/ns_private.h +++ /dev/null @@ -1,273 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_NS_PRIVATE_H -#define MCA_NS_PRIVATE_H - -/* - * includes - */ -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/mca.h" - -#include "orte/dss/dss_types.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/rml/rml_types.h" - -/* - * Global functions for MCA overall collective open and close - */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* default limits */ -#define ORTE_NS_ARRAY_MAX_SIZE INT_MAX -#define ORTE_NS_ARRAY_BLOCK_SIZE 512 -/* - * Internal definitions - */ -typedef uint8_t orte_ns_cmd_bitmask_t; -typedef uint8_t orte_ns_cmd_flag_t; - -/* - * packing type definitions - */ -/* CAUTION - any changes here must also change corresponding - * typedefs above and in ns_types.h - */ -#define ORTE_NS_CMD ORTE_INT8 - - -#if ORTE_ENABLE_JUMBO_APPS -#define ORTE_JOBID_T ORTE_INT32 -#define ORTE_VPID_T ORTE_INT32 -#define ORTE_NODEID_T ORTE_INT32 - -#else -#define ORTE_JOBID_T ORTE_INT16 -#define ORTE_VPID_T ORTE_INT16 -#define ORTE_NODEID_T ORTE_INT16 -#endif - - -/* - * define flag values for remote commands - only used internally - */ -#define ORTE_NS_CREATE_NODEID_CMD (int8_t) 3 -#define ORTE_NS_GET_NODE_INFO_CMD (int8_t) 4 -#define ORTE_NS_CREATE_JOBID_CMD (int8_t) 5 -#define ORTE_NS_GET_JOB_DESC_CMD (int8_t) 6 -#define ORTE_NS_GET_JOB_CHILD_CMD (int8_t) 7 -#define ORTE_NS_GET_ROOT_JOB_CMD (int8_t) 8 -#define ORTE_NS_GET_PARENT_JOB_CMD (int8_t) 9 -#define ORTE_NS_RESERVE_RANGE_CMD (int8_t) 10 -#define ORTE_NS_ASSIGN_OOB_TAG_CMD (int8_t) 11 -#define ORTE_NS_GET_PEERS_CMD (int8_t) 12 -#define ORTE_NS_DEFINE_DATA_TYPE_CMD (int8_t) 13 - -#define ORTE_NS_DUMP_JOBIDS_CMD (int8_t) 16 -#define ORTE_NS_DUMP_TAGS_CMD (int8_t) 17 -#define ORTE_NS_DUMP_DATATYPES_CMD (int8_t) 18 -#define ORTE_NS_GET_VPID_RANGE_CMD (int8_t) 19 -#define ORTE_NS_GET_JOB_FAMILY_CMD (int8_t) 20 - - -/* - * Base functions that are common to all implementations - can be overridden - */ - -ORTE_DECLSPEC int orte_ns_base_create_process_name(orte_process_name_t **name, - orte_jobid_t job, - orte_vpid_t vpid); - -ORTE_DECLSPEC int orte_ns_base_convert_string_to_process_name(orte_process_name_t **name, - const char* name_string); - -ORTE_DECLSPEC int orte_ns_base_get_proc_name_string(char **name_string, - const orte_process_name_t* name); - -ORTE_DECLSPEC int orte_ns_base_get_vpid_string(char **vpid_string, const orte_process_name_t* name); - -ORTE_DECLSPEC int orte_ns_base_convert_vpid_to_string(char **vpid_string, const orte_vpid_t vpid); - -ORTE_DECLSPEC int orte_ns_base_convert_string_to_vpid(orte_vpid_t *vpid, const char* vpidstring); - -ORTE_DECLSPEC int orte_ns_base_get_jobid_string(char **jobid_string, const orte_process_name_t* name); - -ORTE_DECLSPEC int orte_ns_base_convert_jobid_to_string(char **jobid_string, const orte_jobid_t jobid); - -ORTE_DECLSPEC int orte_ns_base_convert_string_to_jobid(orte_jobid_t *jobid, const char* jobidstring); - -ORTE_DECLSPEC int orte_ns_base_get_vpid(orte_vpid_t *vpid, const orte_process_name_t* name); - -ORTE_DECLSPEC int orte_ns_base_get_jobid(orte_jobid_t *jobid, const orte_process_name_t* name); - -ORTE_DECLSPEC int orte_ns_base_convert_string_to_nodeid(orte_nodeid_t *nodeid, const char *string); - -ORTE_DECLSPEC int orte_ns_base_convert_nodeid_to_string(char **nodeid_string, const orte_nodeid_t nodeid); - -ORTE_DECLSPEC int orte_ns_base_compare_fields(orte_ns_cmp_bitmask_t fields, - const orte_process_name_t* name1, - const orte_process_name_t* name2); - -ORTE_DECLSPEC int orte_ns_base_print_dump(orte_buffer_t *buffer); - - -/* not available functions */ -ORTE_DECLSPEC int orte_ns_base_module_init_not_available(void); - -ORTE_DECLSPEC int orte_ns_base_create_nodeids_not_available(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, - char **nodename); - -ORTE_DECLSPEC int orte_ns_base_get_node_info_not_available(char ***nodename, - orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); - -ORTE_DECLSPEC int orte_ns_base_create_jobid_not_available(orte_jobid_t *jobid, opal_list_t *attrs); - -ORTE_DECLSPEC int orte_ns_base_get_job_descendants_not_available(orte_jobid_t** descendants, - orte_std_cntr_t *num_desc, - orte_jobid_t job); - -ORTE_DECLSPEC int orte_ns_base_get_job_children_not_available(orte_jobid_t** children, - orte_std_cntr_t *num_childs, - orte_jobid_t job); - -ORTE_DECLSPEC int orte_ns_base_get_root_job_not_available(orte_jobid_t *root_job, orte_jobid_t job); - -ORTE_DECLSPEC int orte_ns_base_get_parent_job_not_available(orte_jobid_t *parent, orte_jobid_t job); - -ORTE_DECLSPEC int orte_ns_base_get_job_family_not_available(orte_jobid_t **family, orte_std_cntr_t *num_members, orte_jobid_t job); - -ORTE_DECLSPEC int orte_ns_base_reserve_range_not_available(orte_jobid_t job, - orte_vpid_t range, - orte_vpid_t *startvpid); - -ORTE_DECLSPEC int orte_ns_base_get_vpid_range_not_available(orte_jobid_t job, - orte_vpid_t *range); - -ORTE_DECLSPEC int orte_ns_base_assign_rml_tag_not_available(orte_rml_tag_t *tag, char *name); - -ORTE_DECLSPEC int orte_ns_base_define_data_type_not_available( - const char *name, - orte_data_type_t *type); - -ORTE_DECLSPEC int orte_ns_base_create_my_name_not_available(void); - -ORTE_DECLSPEC int orte_ns_base_get_peers_not_available(orte_process_name_t **procs, - orte_std_cntr_t *num_procs, opal_list_t *attributes); - -ORTE_DECLSPEC int orte_ns_base_dump_jobs_not_available(void); -ORTE_DECLSPEC int orte_ns_base_dump_tags_not_available(void); -ORTE_DECLSPEC int orte_ns_base_dump_datatypes_not_available(void); - -ORTE_DECLSPEC int orte_ns_base_ft_event_not_available(int state); - - -/* Base functions used everywhere */ -ORTE_DECLSPEC int orte_ns_base_pack_name(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -ORTE_DECLSPEC int orte_ns_base_pack_nodeid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -ORTE_DECLSPEC int orte_ns_base_pack_jobid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -ORTE_DECLSPEC int orte_ns_base_pack_vpid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -ORTE_DECLSPEC int orte_ns_base_unpack_name(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -ORTE_DECLSPEC int orte_ns_base_unpack_nodeid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -ORTE_DECLSPEC int orte_ns_base_unpack_jobid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -ORTE_DECLSPEC int orte_ns_base_unpack_vpid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -/* - * copy functions - */ - -int orte_ns_base_copy_name(orte_process_name_t **dest, orte_process_name_t *src, orte_data_type_t type); - -int orte_ns_base_copy_vpid(orte_vpid_t **dest, orte_vpid_t *src, orte_data_type_t type); - -int orte_ns_base_copy_nodeid(orte_nodeid_t **dest, orte_nodeid_t *src, orte_data_type_t type); - -int orte_ns_base_copy_jobid(orte_jobid_t **dest, orte_jobid_t *src, orte_data_type_t type); - -/* - * compare functions - */ - -int orte_ns_base_compare_name(orte_process_name_t *value1, - orte_process_name_t *value2, - orte_data_type_t type); - - -int orte_ns_base_compare_vpid(orte_vpid_t *value1, - orte_vpid_t *value2, - orte_data_type_t type); - -int orte_ns_base_compare_jobid(orte_jobid_t *value1, - orte_jobid_t *value2, - orte_data_type_t type); - -int orte_ns_base_compare_nodeid(orte_nodeid_t *value1, - orte_nodeid_t *value2, - orte_data_type_t type); - -/* - * size functions - */ - -int orte_ns_base_std_size(size_t *size, void *src, orte_data_type_t type); - -/* - * release functions - */ - -void orte_ns_base_std_release(orte_data_value_t *value); - -/* - * print functions - */ - -int orte_ns_base_std_print(char **output, char *prefix, void *src, orte_data_type_t type); - -int orte_ns_base_print_name(char **output, char *prefix, orte_process_name_t *name, orte_data_type_t type); - - -/* - * external API functions will be documented in the mca/ns/ns.h file - */ - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/mca/ns/ns.h b/orte/mca/ns/ns.h deleted file mode 100644 index 5941cdb0cd..0000000000 --- a/orte/mca/ns/ns.h +++ /dev/null @@ -1,572 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI Name Server - * - * The Open MPI Name Server provides unique name ranges for processes - * within the universe. Each universe will have one name server - * running within the seed daemon. This is done to prevent the - * inadvertent duplication of names. - */ - -#ifndef MCA_NS_H -#define MCA_NS_H - -/* - * includes - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "orte/dss/dss.h" - -#include "opal/mca/mca.h" -#include "orte/mca/rml/rml.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "ns_types.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - -/* - * Component functions - all MUST be provided! - */ - -/* Init the selected module - */ -typedef int (*orte_ns_base_module_init_fn_t)(void); - -/**** NODE FUNCTIONS ****/ -/* - * Get an array of node id's - * Given a NULL-terminated array of names of nodes within it, this function assigns an id to represent - * each node. - */ -typedef int (*orte_ns_base_module_create_nodeids_fn_t)(orte_nodeid_t **nodes, orte_std_cntr_t *nnodes, - char **nodenames); - -/* - * Get node info - * Retrieve the names of an array of nodes given their nodeids. - * - * @param nodeids The ids of the node. - * @param nodenames Returns a pointer to a NULL-terminated array of strdup'd strings containing the node names. - * @retval ORTE_SUCCESS The nodename was created and returned. - * @retval ORTE_ERROR_VALUE An error code indicative of the problem. - */ -typedef int (*orte_ns_base_module_get_node_info_fn_t)(char ***nodename, - orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); - -/* - * Convert nodeid to character string - * Returns the nodeid in a character string representation. The string is created - * by expressing the provided nodeid in decimal. Memory for the string is - * allocated by the function - releasing that allocation is the responsibility of - * the calling program. - * - * @param nodeid The nodeid to be converted. - * - * @param *nodeid_string A pointer to a character string representation of the nodeid. - * @retval ORTE_SUCCESS The string was created and returned. - * @retval ORTE_ERROR_VALUE An error code indicative of the problem. - */ -typedef int (*orte_ns_base_module_convert_nodeid_to_string_fn_t)(char **nodeid_string, const orte_nodeid_t nodeid); - -/* - * Convert a string to a nodeid. - * Converts a characters string into a nodeid. The character string must be a - * decimal representation of a valid nodeid. - * - * @param nodeidstring The string to be converted. - * - * @param nodeid A pointer to a location where the resulting nodeid is to be stored. - * @retval ORTE_SUCCESS The string was created and returned. - * @retval ORTE_ERROR_VALUE An error code indicative of the problem. - */ -typedef int (*orte_ns_base_module_convert_string_to_nodeid_fn_t)(orte_nodeid_t *nodeid, const char *nodeidstring); - - -/**** JOB ID FUNCTIONS ****/ -/** - * Create a new job id. - * Allocate a new job id for use by the caller. - * - * The 0 job id is reserved for daemons within the system and will not be allocated. - * Developers should therefore assume that the daemon job id is automatically allocated - * and proceed to request names against it. - * - * @param None - * @param jobid A pointer to the location where the jobid is to be returned. - * @param attrs A list of attributes that describe any conditions to be placed on - * the assigned jobid. For example, specifying USE_PARENT indicates that the specified - * jobid is to be identified as the parent of the new jobid. USE_ROOT indicates that - * the root of the job family of the specified jobid is to be identified as the parent. - */ -typedef int (*orte_ns_base_module_create_jobid_fn_t)(orte_jobid_t *jobid, opal_list_t *attrs); - -/* - * Get job descendants - * Given a jobid, return the array of jobids that descend from this one. - */ -typedef int (*orte_ns_base_module_get_job_descendants_fn_t)(orte_jobid_t** descendants, - orte_std_cntr_t *num_desc, - orte_jobid_t job); - -/* - * Get job children - * Given a jobid, return the array of jobids that are direct children of that job - */ -typedef int (*orte_ns_base_module_get_job_children_fn_t)(orte_jobid_t** children, - orte_std_cntr_t *num_childs, - orte_jobid_t job); - -/* - * Get root job from job family - * Given a jobid, return the jobid at the head of this job's family. If the jobid provided is the - * root for that family, that value will be returned. - */ -typedef int (*orte_ns_base_module_get_root_job_fn_t)(orte_jobid_t *root_job, orte_jobid_t job); - -/* - * Get a job family - * Given a jobid, return the array of jobids (including the given one) that are members - * of that extended job family. This will return ALL jobs related to the given one. - */ -typedef int (*orte_ns_base_module_get_job_family_fn_t)(orte_jobid_t** family, - orte_std_cntr_t *num_members, - orte_jobid_t job); - -/* - * Get parent jobid - * Given a jobid, return the parent job from which it descended. If the provided jobid is the - * root (i.e., has no parent), this function will return that same value. - */ -typedef int (*orte_ns_base_module_get_parent_job_fn_t)(orte_jobid_t *parent, orte_jobid_t job); - -/** - * Reserve a range of process id's. - * The reserve_range() function reserves a range of vpid's for the given jobid. - * - * @param jobid The id of the job for which the vpid's are to be reserved. - * @param range The number of vpid's to be reserved. The function will find the - * next available process id and assign range-number of sequential id's to the caller. - * These id's will be reserved - i.e., they cannot be assigned to any subsequent caller. - * - * @retval startid The starting value of the reserved range of vpid's. A value of MCA_NS_BASE_VPID_MAX - * indicates that an error occurred. - * - * @code - * starting_procid = ompi_name_server.reserve_range(jobid, range) - * @endcode - */ -typedef int (*orte_ns_base_module_reserve_range_fn_t)(orte_jobid_t job, - orte_vpid_t range, - orte_vpid_t *startvpid); - -/* - * Get the range of vpids assigned to a specified jobid - * Given a jobid, return the maximum vpid value assigned to that job. - */ -typedef int (*orte_ns_base_module_get_vpid_range_fn_t)(orte_jobid_t job, orte_vpid_t *range); - -/** - * Get the job id as a character string. - * The get_jobid_string() function returns the job id in a character string - * representation. The string is created by expressing the field in hexadecimal. Memory - * for the string is allocated by the function - releasing that allocation is the - * responsibility of the calling program. - * - * @param *name A pointer to the name structure containing the name to be - * "translated" to a string. - * - * @retval *name_string A pointer to the character string representation of the - * job id. - * @retval NULL Indicates an error occurred - either no memory could be allocated - * or the caller provided an incorrect name pointer (e.g., NULL). - * - * @code - * jobid-string = ompi_name_server.get_jobid_string(&name) - * @endcode - */ -typedef int (*orte_ns_base_module_get_jobid_string_fn_t)(char **jobid_string, const orte_process_name_t* name); - -/** - * Convert jobid to character string - * The convert_jobid_to_string() function returns the jobid in a character string representation. - * The string is created by expressing the provided jobid in hexadecimal. Memory - * for the string is allocated by the function - releasing that allocation is the - * responsibility of the calling program. - * - * @param jobid The jobid to be converted. - * - * @retval *jobid_string A pointer to a character string representation of the - * jobid. - * @retval NULL Indicates an error occurred - probably no memory could be allocated. - * - * @code - * jobid-string = ompi_name_server.convert_jobid_to_string(jobid); - * @endcode - */ -typedef int (*orte_ns_base_module_convert_jobid_to_string_fn_t)(char **jobid_string, const orte_jobid_t jobid); - -/** - * Convert a string to a jobid - * Converts a character string into a jobid. The character string must be a hexadecimal - * representation of a valid jobid. - * - * @param jobidstring The string to be converted. - * - * @retval jobid The resulting jobid. - * @retval MCA_NS_BASE_JOBID_MAX String could not be converted. - * - * @code - * jobid = ompi_name_server.convert_string_to_jobid(jobidstring); - * @endcode - * - */ -typedef int (*orte_ns_base_module_convert_string_to_jobid_fn_t)(orte_jobid_t *jobid, const char* jobidstring); - - - -/**** NAME FUNCTIONS ****/ -/** - * Obtain a single new process name. - * The create_process_name() function creates a single process name structure and fills the - * fields with the provided values. - * - * @param job The id of the job to which the process will belong. - * @param vpid The virtual process id for the name. Note that no check is made for uniqueness - - * the caller is responsible for ensuring that the requested name is, in fact, unique - * by first requesting reservation of an appropriate range of virtual process id's. - * - * @retval *name Pointer to an ompi_process_name_t structure containing the name. - * @retval NULL Indicates an error, probably due to inability to allocate memory for - * the name structure. - * - * @code - * new_name = ompi_name_server.create_process_name(cell, job, vpid); - * @endcode - */ -typedef int (*orte_ns_base_module_create_proc_name_fn_t)(orte_process_name_t **name, - orte_jobid_t job, - orte_vpid_t vpid); - -/* - * Create my name - * If a process is a singleton, then it needs to create a name for itself. When - * a persistent daemon is present, this requires a communication to that daemon. - * Since the RML uses process names as its index into the RML communicator table, - * the RML automatically assigns a name to each process when it first attempts - * to communicate. This function takes advantage of that behavior to ensure that - * one, and ONLY one, name gets assigned to the process - */ -typedef int (*orte_ns_base_module_create_my_name_fn_t)(void); - -/** - * Convert a string representation to a process name. - * The convert_string_to_process_name() function converts a string representation of a process - * name into an Open MPI name structure. The string must be of the proper form - i.e., it - * must be in the form "jobid.vpid", where each field is expressed in hexadecimal form. - * - * @param *name_string A character string representation of a process name. - * - * @retval *name Pointer to an orte_process_name_t structure containing the name. - * @retval NULL Indicates an error, probably due to inability to allocate memory for - * the name structure. - * - */ -typedef int (*orte_ns_base_module_convert_string_to_process_name_fn_t)(orte_process_name_t **name, - const char* name_string); - - -/** - * Get the process name as a character string. - * The get_proc_name_string() function returns the entire process name in a - * character string representation. - * - * The memory required for the string is allocated by the function - releasing - * that allocation is the responsibility of the calling program. - * - * @param *name A pointer to the name structure containing the name to be - * "translated" to a string. - * - * @retval *name_string A pointer to the character string representation of the - * full name. - * @retval NULL Indicates an error occurred - either no memory could be allocated - * or the caller provided an incorrect name pointer (e.g., NULL). - * - * @code - * name-string = ompi_name_server.get_proc_name_string(&name) - * @endcode - */ -typedef int (*orte_ns_base_module_get_proc_name_string_fn_t)(char **name_string, - const orte_process_name_t* name); - -/** - * Compare two name values. - * The compare() function checks the value of the fields in the two - * provided names, and returns a value indicating if the first one is less than, greater - * than, or equal to the second. The value of each field is compared in a hierarchical - * fashion, with jobid and vpid in sequence. The bit-mask - * indicates which fields are to be included in the comparison. Fields not included via the - * bit-mask are ignored. Thus, the caller may request that any combination of the two fields - * be included in the comparison. - * - * @param fields A bit-mask indicating which fields are to be included in the comparison. The - * comparison is performed on a hierarchical basis, with - * jobid and then vpid. Each field can be included separately, thus allowing the caller - * to configure the comparison to meet their needs. - * @param *name1 A pointer to the first name structure. - * @param *name2 A pointer to the second name structure. - * - * @retval -1 The indicated fields of the first provided name are less than the same - * fields of the second provided name. - * @retval 0 The indicated fields of the two provided names are equal. - * @retval +1 The indicated fields of the first provided name is greater than the same - * fields of the second provided name. - * - */ -typedef int (*orte_ns_base_module_compare_fields_fn_t)(orte_ns_cmp_bitmask_t fields, - const orte_process_name_t* name1, - const orte_process_name_t* name2); - - -/**** VPID FUNCTIONS ****/ -/** - * Get the virtual process id as a character string. - * The get_vpid_string() function returns the vpid in a character string - * representation. The string is created by expressing the field in hexadecimal. Memory - * for the string is allocated by the function - releasing that allocation is the - * responsibility of the calling program. - * - * @param *name A pointer to the name structure containing the name to be - * "translated" to a string. - * - * @retval *name_string A pointer to the character string representation of the - * vpid. - * @retval NULL Indicates an error occurred - either no memory could be allocated - * or the caller provided an incorrect name pointer (e.g., NULL). - * - * @code - * vpid-string = ompi_name_server.get_vpid_string(&name) - * @endcode - */ -typedef int (*orte_ns_base_module_get_vpid_string_fn_t)(char **vpid_string, const orte_process_name_t* name); - -/** - * Convert vpid to character string - * Returns the vpid in a character string representation. The string is created - * by expressing the provided vpid in hexadecimal. Memory for the string is - * allocated by the function - releasing that allocation is the responsibility of - * the calling program. - * - * @param vpid The vpid to be converted. - * - * @retval *vpid_string A pointer to a character string representation of the vpid. - * @retval NULL Indicates an error occurred - probably no memory could be allocated. - * - * @code - * vpid-string = ompi_name_server.convert_vpid_to_string(vpid); - * @endcode - */ - typedef int (*orte_ns_base_module_convert_vpid_to_string_fn_t)(char **vpid_string, const orte_vpid_t vpid); - - /** - * Convert a string to a vpid. - * Converts a characters string into a vpid. The character string must be a - * hexadecimal representation of a valid vpid. - * - * @param vpidstring The string to be converted. - * - * @retval vpid The resulting vpid - * @retval MCA_NS_BASE_VPID_MAX String could not be converted. - * - * @code - * vpid = ompi_name_server.convert_string_to_vpid(vpidstring); - * @endcode - */ -typedef int (*orte_ns_base_module_convert_string_to_vpid_fn_t)(orte_vpid_t *vpid, const char* vpidstring); - - - -/**** TAG SERVER ****/ -/* - * Allocate a tag - * If name is NULL, tag server provides next unique tag but cannot look - * that number up again for anyone else. - */ -typedef int (*orte_ns_base_module_assign_rml_tag_fn_t)(orte_rml_tag_t *tag, - char *name); - -/**** DATA TYPE SERVER ****/ -/* This function defines a new data type and gives it a system-wide unique - * identifier for use in the data packing subsystem. Only called from the - * dps when needing a new identifier. - */ -typedef int (*orte_ns_base_module_define_data_type_fn_t)( - const char *name, - orte_data_type_t *type); - - -/**** PEER RETRIEVAL ****/ -/** - * Get the process names of all processes in the specified conditions. It is - * sometimes necessary for a process to communicate to all processes of a - * given job, all processes in a given cell or on a given node, etc. The RML - * communication system utilizes the process name as its "pointer" for - * sending messages to another process. This function returns an array of - * process name pointers that contains the names of all processes that - * meet the specified combination of attributes. - * - * @param procs The location where the address of the array of pointers - * is to be stored. The function will dynamically allocate space for the - * array - the caller is responsible for releasing this space. - * @param num_procs The location where the number of entries in the - * returned array is to be stored. - * @param attributes A list of conditions to be used in defining the - * peers to be included in the returned array. This can include a - * request that all peers for the parent job be returned, for example. - * More common options would be to specify a cell or job. - * - * NOTE ORTE_JOBID_WILDCARD - * in the attribute list will cause the function to return the names of *all* - * processes currently active. - * - */ -typedef int (*orte_ns_base_module_get_peers_fn_t)(orte_process_name_t **procs, - orte_std_cntr_t *num_procs, - opal_list_t *attributes); - - -/* - * DIAGNOSTIC INTERFACES - */ -typedef int (*orte_ns_base_module_dump_jobs_fn_t)(void); - -typedef int (*orte_ns_base_module_dump_tags_fn_t)(void); - -typedef int (*orte_ns_base_module_dump_datatypes_fn_t)(void); - -typedef int (*orte_ns_base_module_ft_event_fn_t)(int state); - -/* - * Ver 2.0 - */ -struct mca_ns_base_module_2_0_0_t { - /* init */ - orte_ns_base_module_init_fn_t init; - /** node functions */ - orte_ns_base_module_create_nodeids_fn_t create_nodeids; - orte_ns_base_module_get_node_info_fn_t get_node_info; - orte_ns_base_module_convert_nodeid_to_string_fn_t convert_nodeid_to_string; - orte_ns_base_module_convert_string_to_nodeid_fn_t convert_string_to_nodeid; - /* jobid functions */ - orte_ns_base_module_create_jobid_fn_t create_jobid; - orte_ns_base_module_get_job_descendants_fn_t get_job_descendants; - orte_ns_base_module_get_job_children_fn_t get_job_children; - orte_ns_base_module_get_root_job_fn_t get_root_job; - orte_ns_base_module_get_parent_job_fn_t get_parent_job; - orte_ns_base_module_get_job_family_fn_t get_job_family; - orte_ns_base_module_get_jobid_string_fn_t get_jobid_string; - orte_ns_base_module_convert_jobid_to_string_fn_t convert_jobid_to_string; - orte_ns_base_module_convert_string_to_jobid_fn_t convert_string_to_jobid; - orte_ns_base_module_reserve_range_fn_t reserve_range; - orte_ns_base_module_get_vpid_range_fn_t get_vpid_range; - /* vpid functions */ - orte_ns_base_module_get_vpid_string_fn_t get_vpid_string; - orte_ns_base_module_convert_vpid_to_string_fn_t convert_vpid_to_string; - orte_ns_base_module_convert_string_to_vpid_fn_t convert_string_to_vpid; - /* name functions */ - orte_ns_base_module_create_proc_name_fn_t create_process_name; - orte_ns_base_module_create_my_name_fn_t create_my_name; - orte_ns_base_module_convert_string_to_process_name_fn_t convert_string_to_process_name; - orte_ns_base_module_get_proc_name_string_fn_t get_proc_name_string; - orte_ns_base_module_compare_fields_fn_t compare_fields; - /* peer functions */ - orte_ns_base_module_get_peers_fn_t get_peers; - /* tag server functions */ - orte_ns_base_module_assign_rml_tag_fn_t assign_rml_tag; - /* data type functions */ - orte_ns_base_module_define_data_type_fn_t define_data_type; - /* diagnostic functions */ - orte_ns_base_module_dump_jobs_fn_t dump_jobs; - orte_ns_base_module_dump_tags_fn_t dump_tags; - orte_ns_base_module_dump_datatypes_fn_t dump_datatypes; - - orte_ns_base_module_ft_event_fn_t ft_event; -}; - -typedef struct mca_ns_base_module_2_0_0_t mca_ns_base_module_2_0_0_t; -typedef mca_ns_base_module_2_0_0_t mca_ns_base_module_t; - -/* - * NS Component - */ -/** - * Initialize the selected component. - */ -typedef mca_ns_base_module_t* (*mca_ns_base_component_init_fn_t)(int *priority); - -/** - * Finalize the selected module - */ -typedef int (*mca_ns_base_component_finalize_fn_t)(void); - - -/* - * the standard component data structure - */ - -struct mca_ns_base_component_2_0_0_t { - mca_base_component_t ns_version; - mca_base_component_data_1_0_0_t ns_data; - - mca_ns_base_component_init_fn_t ns_init; - mca_ns_base_component_finalize_fn_t ns_finalize; -}; -typedef struct mca_ns_base_component_2_0_0_t mca_ns_base_component_2_0_0_t; -typedef mca_ns_base_component_2_0_0_t mca_ns_base_component_t; - - - -/* - * Macro for use in components that are of type ns v2.0.0 - */ -#define MCA_NS_BASE_VERSION_2_0_0 \ - /* ns v2.0 is chained to MCA v1.0 */ \ - MCA_BASE_VERSION_1_0_0, \ - /* ns v2.0 */ \ - "ns", 2, 0, 0 - -/* Global structure for accessing name server functions - */ -ORTE_DECLSPEC extern mca_ns_base_module_t orte_ns; /* holds selected module's function pointers */ - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/ns/ns_types.h b/orte/mca/ns/ns_types.h deleted file mode 100644 index 47b15b0448..0000000000 --- a/orte/mca/ns/ns_types.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI Name Server - * - * The Open MPI Name Server provides unique name ranges for processes - * within the universe. Each universe will have one name server - * running within the seed daemon. This is done to prevent the - * inadvertent duplication of names. - */ - -#ifndef ORTE_NS_TYPES_H_ -#define ORTE_NS_TYPES_H_ - -/* - * includes - */ - -#include "orte_config.h" -#include "orte/orte_types.h" - -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#include - -#include "opal/types.h" -#include "opal/class/opal_list.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/**** NS ATTRIBUTES ****/ -#define ORTE_NS_USE_PARENT "orte-ns-use-parent" -#define ORTE_NS_USE_ROOT "orte-ns-use-root" -#define ORTE_NS_USE_JOBID "orte-ns-use-job" -#define ORTE_NS_USE_NODE "orte-ns-use-node" -#define ORTE_NS_INCLUDE_DESCENDANTS "orte-ns-include-desc" -#define ORTE_NS_INCLUDE_CHILDREN "orte-ns-include-child" -#define ORTE_NS_USE_JOB_FAMILY "orte-ns-use-job-family" - - -/* - * useful defines for bit-masks - */ - -#define ORTE_NS_CMP_NONE 0x00 -#define ORTE_NS_CMP_JOBID 0x02 -#define ORTE_NS_CMP_VPID 0x04 -#define ORTE_NS_CMP_ALL 0Xff - -#define ORTE_NAME_ARGS(n) \ - (long) ((NULL == n) ? (long)-1 : (long)(n)->jobid), \ - (long) ((NULL == n) ? (long)-1 : (long)(n)->vpid) - - /* - * general typedefs & structures - */ -/** Set the allowed range for ids in each space - * - * NOTE: Be sure to update the ORTE_NAME_ARGS #define (above) and all - * uses of it if these types change to be larger than (long)! The - * HTON and NTOH macros below must be updated, as well as the MIN / - * MAX macros below and the datatype packing representations in - * ns_private.h - * - * NOTE: Be sure to keep the jobid and vpid types the same size! Due - * to padding rules, it won't save anything to have one larger than - * the other, and it will cause problems in the communication subsystems - */ - -#if ORTE_ENABLE_JUMBO_APPS - typedef orte_std_cntr_t orte_jobid_t; - #define ORTE_JOBID_MAX ORTE_STD_CNTR_MAX - #define ORTE_JOBID_MIN ORTE_STD_CNTR_MIN - typedef orte_std_cntr_t orte_vpid_t; - #define ORTE_VPID_MAX ORTE_STD_CNTR_MAX - #define ORTE_VPID_MIN ORTE_STD_CNTR_MIN - typedef orte_std_cntr_t orte_nodeid_t; - #define ORTE_NODEID_MAX ORTE_STD_CNTR_MAX - #define ORTE_NODEID_MIN ORTE_STD_CNTR_MIN - - #define ORTE_PROCESS_NAME_HTON(n) \ - do { \ - n.jobid = htonl(n.jobid); \ - n.vpid = htonl(n.vpid); \ - } while (0) - - #define ORTE_PROCESS_NAME_NTOH(n) \ - do { \ - n.jobid = ntohl(n.jobid); \ - n.vpid = ntohl(n.vpid); \ - } while (0) - -#else - typedef int16_t orte_jobid_t; - #define ORTE_JOBID_MAX INT16_MAX - #define ORTE_JOBID_MIN INT16_MIN - typedef int16_t orte_vpid_t; - #define ORTE_VPID_MAX INT16_MAX - #define ORTE_VPID_MIN INT16_MIN - typedef int16_t orte_nodeid_t; - #define ORTE_NODEID_MAX INT16_MAX - #define ORTE_NODEID_MIN INT16_MIN - - #define ORTE_PROCESS_NAME_HTON(n) \ - do { \ - n.jobid = htons(n.jobid); \ - n.vpid = htons(n.vpid); \ - } while (0) - - #define ORTE_PROCESS_NAME_NTOH(n) \ - do { \ - n.jobid = ntohs(n.jobid); \ - n.vpid = ntohs(n.vpid); \ - } while (0) - -#endif - - -typedef uint8_t orte_ns_cmp_bitmask_t; /**< Bit mask for comparing process names */ - -struct orte_process_name_t { - orte_jobid_t jobid; /**< Job number */ - orte_vpid_t vpid; /**< Process number */ -}; -typedef struct orte_process_name_t orte_process_name_t; - - -/* useful define to print name args in output messages */ -ORTE_DECLSPEC extern char* orte_ns_base_print_name_args(const orte_process_name_t *name); -#define ORTE_NAME_PRINT(n) \ - orte_ns_base_print_name_args(n) - -/* - * define invalid values - */ -#define ORTE_JOBID_INVALID (ORTE_JOBID_MIN + 1) -#define ORTE_VPID_INVALID (ORTE_VPID_MIN + 1) -#define ORTE_NODEID_INVALID (ORTE_NODEID_MIN + 1) - -/* - * define wildcard values (should be -1) - */ -#define ORTE_JOBID_WILDCARD -1 -#define ORTE_VPID_WILDCARD -1 -#define ORTE_NODEID_WILDCARD -1 - -/* - * Shortcut for some commonly used names - */ - -#define ORTE_NAME_WILDCARD &orte_ns_name_wildcard -ORTE_DECLSPEC extern orte_process_name_t orte_ns_name_wildcard; /** instantiated in orte/mca/ns/base/ns_base_open.c */ - -#define ORTE_NAME_INVALID &orte_ns_name_invalid -ORTE_DECLSPEC extern orte_process_name_t orte_ns_name_invalid; /** instantiated in orte/mca/ns/base/ns_base_open.c */ - -#define ORTE_PROC_MY_NAME orte_process_info.my_name - -#define ORTE_PROC_MY_HNP &orte_ns_name_my_hnp -ORTE_DECLSPEC extern orte_process_name_t orte_ns_name_my_hnp; /** instantiated in orte/mca/ns/base/ns_base_open.c */ - -/** List of names for general use - */ -struct orte_namelist_t { - opal_list_item_t item; /**< Allows this item to be placed on a list */ - orte_process_name_t *name; /**< Name of a process */ -}; -typedef struct orte_namelist_t orte_namelist_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_namelist_t); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/ns/proxy/Makefile.am b/orte/mca/ns/proxy/Makefile.am deleted file mode 100644 index 420c7b1cda..0000000000 --- a/orte/mca/ns/proxy/Makefile.am +++ /dev/null @@ -1,46 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - ns_proxy.h \ - ns_proxy_cell_fns.c \ - ns_proxy_diag_fns.c \ - ns_proxy_general_fns.c \ - ns_proxy_job_fns.c \ - ns_proxy_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_ns_proxy_DSO -component_noinst = -component_install = mca_ns_proxy.la -else -component_noinst = libmca_ns_proxy.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_ns_proxy_la_SOURCES = $(sources) -mca_ns_proxy_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_ns_proxy_la_SOURCES =$(sources) -libmca_ns_proxy_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/ns/proxy/ns_proxy.h b/orte/mca/ns/proxy/ns_proxy.h deleted file mode 100644 index c651e53926..0000000000 --- a/orte/mca/ns/proxy/ns_proxy.h +++ /dev/null @@ -1,147 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef NS_PROXY_H -#define NS_PROXY_H - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/types.h" -#include "opal/class/opal_list.h" - -#include "orte/dss/dss.h" - -#include "orte/mca/ns/ns.h" -#include "orte/mca/ns/base/ns_private.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -struct orte_ns_proxy_tagitem_t { - opal_object_t super; - orte_rml_tag_t tag; /**< OOB tag */ - char *name; /**< Name associated with tag */ -}; -typedef struct orte_ns_proxy_tagitem_t orte_ns_proxy_tagitem_t; - -OBJ_CLASS_DECLARATION(orte_ns_proxy_tagitem_t); - -struct orte_ns_proxy_dti_t { - opal_object_t super; - orte_data_type_t id; /**< data type id */ - char *name; /**< Name associated with data type */ -}; -typedef struct orte_ns_proxy_dti_t orte_ns_proxy_dti_t; - -OBJ_CLASS_DECLARATION(orte_ns_proxy_dti_t); - - -/* - * Module open / close - */ -int orte_ns_proxy_open(void); -int orte_ns_proxy_close(void); - - -/* - * Startup / Shutdown - */ -mca_ns_base_module_t* orte_ns_proxy_init(int *priority); -int orte_ns_proxy_module_init(void); -int orte_ns_proxy_finalize(void); - -/* - * globals used within proxy component - */ -typedef struct { - size_t max_size, block_size; - int debug; - orte_pointer_array_t *tags; - orte_rml_tag_t num_tags; - orte_pointer_array_t *dts; - orte_data_type_t num_dts; - opal_mutex_t mutex; -} orte_ns_proxy_globals_t; - -extern orte_ns_proxy_globals_t orte_ns_proxy; - -/* - * simplifying define - */ -#define ORTE_NS_MY_REPLICA orte_process_info.ns_replica - - -/* - * proxy function prototypes - */ -int orte_ns_proxy_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodenames); - -int orte_ns_proxy_get_node_info(char ***nodename, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); - -int orte_ns_proxy_create_jobid(orte_jobid_t *jobid, opal_list_t *attrs); - -int orte_ns_proxy_get_job_descendants(orte_jobid_t** descendants, orte_std_cntr_t *ndesc, orte_jobid_t job); - -int orte_ns_proxy_get_job_children(orte_jobid_t** descendants, orte_std_cntr_t *ndesc, orte_jobid_t job); - -int orte_ns_proxy_get_root_job(orte_jobid_t *root_job, orte_jobid_t job); - -int orte_ns_proxy_get_parent_job(orte_jobid_t *parent, orte_jobid_t job); - -int orte_ns_proxy_get_job_family(orte_jobid_t** family, orte_std_cntr_t *num_members, orte_jobid_t job); - -int orte_ns_proxy_reserve_range(orte_jobid_t job, orte_vpid_t range, - orte_vpid_t *startvpid); - -int orte_ns_proxy_get_vpid_range(orte_jobid_t job, orte_vpid_t *range); - -int orte_ns_proxy_get_peers(orte_process_name_t **procs, - orte_std_cntr_t *num_procs, opal_list_t *attrs); - -int orte_ns_proxy_assign_rml_tag(orte_rml_tag_t *tag, char *name); - -int orte_ns_proxy_define_data_type(const char *name, - orte_data_type_t *type); - -int orte_ns_proxy_create_my_name(void); - -/* - * Diagnostic functions - */ -int orte_ns_proxy_dump_jobs(void); - -int orte_ns_proxy_dump_tags(void); - -int orte_ns_proxy_dump_datatypes(void); - -int orte_ns_proxy_ft_event(int state); - -/* - * - */ -ORTE_MODULE_DECLSPEC extern mca_ns_base_component_t mca_ns_proxy_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif diff --git a/orte/mca/ns/proxy/ns_proxy_cell_fns.c b/orte/mca/ns/proxy/ns_proxy_cell_fns.c deleted file mode 100644 index 83474b8c29..0000000000 --- a/orte/mca/ns/proxy/ns_proxy_cell_fns.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/mca/mca.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "ns_proxy.h" - -/** - * globals - */ - -/* - * functions - */ - -int orte_ns_proxy_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodenames) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count, index; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); - - command = ORTE_NS_CREATE_NODEID_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - count = opal_argv_count(nodenames); - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &count, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, nodenames, count, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_NS_CREATE_NODEID_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &index, &count, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /** allocate the space for the nodeids */ - *nodeids = (orte_nodeid_t*)malloc(index * sizeof(orte_nodeid_t)); - if (NULL == *nodeids) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, nodeids, &index, ORTE_NODEID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - OBJ_RELEASE(answer); - - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_SUCCESS; -} - -int orte_ns_proxy_get_node_info(char ***nodenames, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count, index; - int rc, ret=ORTE_SUCCESS; - - OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); - - command = ORTE_NS_GET_NODE_INFO_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &num_nodes, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, nodeids, num_nodes, ORTE_NODEID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (ORTE_NS_GET_NODE_INFO_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &index, &count, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - /** create the space for the nodenames */ - *nodenames = (char**)malloc(index * sizeof(char*)); - if (NULL == *nodenames) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, *nodenames, &index, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ret, &count, ORTE_INT))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ret; -} diff --git a/orte/mca/ns/proxy/ns_proxy_component.c b/orte/mca/ns/proxy/ns_proxy_component.c deleted file mode 100644 index 8d5e47c4d9..0000000000 --- a/orte/mca/ns/proxy/ns_proxy_component.c +++ /dev/null @@ -1,324 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open MPI Name Server - * - * The Open MPI Name Server provides unique name ranges for processes - * within the universe. Each universe will have one name server - * running within the seed daemon. This is done to prevent the - * inadvertent duplication of names. - */ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/util/proc_info.h" -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/mca_base_param.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" - -#include "ns_proxy.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_ns_base_component_t mca_ns_proxy_component = { - { - MCA_NS_BASE_VERSION_2_0_0, - - "proxy", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_ns_proxy_open, /* module open */ - orte_ns_proxy_close /* module close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - orte_ns_proxy_init, /* module init */ - orte_ns_proxy_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static mca_ns_base_module_t orte_ns_proxy_module = { - /* init */ - orte_ns_proxy_module_init, - /** node functions */ - orte_ns_proxy_create_nodeids, - orte_ns_proxy_get_node_info, - orte_ns_base_convert_nodeid_to_string, - orte_ns_base_convert_string_to_nodeid, - /* jobid functions */ - orte_ns_proxy_create_jobid, - orte_ns_proxy_get_job_descendants, - orte_ns_proxy_get_job_children, - orte_ns_proxy_get_root_job, - orte_ns_proxy_get_parent_job, - orte_ns_proxy_get_job_family, - orte_ns_base_get_jobid_string, - orte_ns_base_convert_jobid_to_string, - orte_ns_base_convert_string_to_jobid, - orte_ns_proxy_reserve_range, - orte_ns_proxy_get_vpid_range, - /* vpid functions */ - orte_ns_base_get_vpid_string, - orte_ns_base_convert_vpid_to_string, - orte_ns_base_convert_string_to_vpid, - /* name functions */ - orte_ns_base_create_process_name, - orte_ns_proxy_create_my_name, - orte_ns_base_convert_string_to_process_name, - orte_ns_base_get_proc_name_string, - orte_ns_base_compare_fields, - /* peer functions */ - orte_ns_proxy_get_peers, - /* tag server functions */ - orte_ns_proxy_assign_rml_tag, - /* data type functions */ - orte_ns_proxy_define_data_type, - /* diagnostic functions */ - orte_ns_proxy_dump_jobs, - orte_ns_proxy_dump_tags, - orte_ns_proxy_dump_datatypes, - orte_ns_proxy_ft_event -}; - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* constructor - used to initialize state of taglist instance */ -static void orte_ns_proxy_tagitem_construct(orte_ns_proxy_tagitem_t* tagitem) -{ - tagitem->tag = ORTE_RML_TAG_MAX; - tagitem->name = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_ns_proxy_tagitem_destructor(orte_ns_proxy_tagitem_t* tagitem) -{ - if (NULL != tagitem->name) { - free(tagitem->name); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_ns_proxy_tagitem_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_ns_proxy_tagitem_construct, /* constructor */ - orte_ns_proxy_tagitem_destructor); /* destructor */ - -/* constructor - used to initialize state of dtilist instance */ -static void orte_ns_proxy_dti_construct(orte_ns_proxy_dti_t* dti) -{ - dti->id = ORTE_DSS_ID_MAX; - dti->name = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_ns_proxy_dti_destructor(orte_ns_proxy_dti_t* dti) -{ - if (NULL != dti->name) { - free(dti->name); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE( - orte_ns_proxy_dti_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_ns_proxy_dti_construct, /* constructor */ - orte_ns_proxy_dti_destructor); /* destructor */ - -/* - * globals needed within proxy component - */ - -orte_ns_proxy_globals_t orte_ns_proxy; - - -/* - * Open the proxy component and obtain the name of my proxy. - */ -int orte_ns_proxy_open(void) -{ - int id, param; - - id = mca_base_param_register_int("ns", "proxy", "debug", NULL, 0); - mca_base_param_lookup_int(id, &orte_ns_proxy.debug); - - id = mca_base_param_register_int("ns", "proxy", "maxsize", NULL, - ORTE_NS_ARRAY_MAX_SIZE); - mca_base_param_lookup_int(id, ¶m); - orte_ns_proxy.max_size = (size_t)param; - - id = mca_base_param_register_int("ns", "proxy", "blocksize", NULL, - ORTE_NS_ARRAY_BLOCK_SIZE); - mca_base_param_lookup_int(id, ¶m); - orte_ns_proxy.block_size = (size_t)param; - - return ORTE_SUCCESS; -} - -/* - * ditto for this one - */ -int orte_ns_proxy_close(void) -{ - return ORTE_SUCCESS; -} - -mca_ns_base_module_t* orte_ns_proxy_init(int *priority) -{ - orte_process_name_t name; - int ret, rc; - - /* If we are NOT to host a proxy, then we want to be selected, so do all - the setup and return the module */ - /* opal_output(mca_ns_base_output, "ns_proxy: entered init\n"); */ - if (NULL != orte_process_info.ns_replica_uri) { - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other ns components). If - we're not the seed, then we don't want to be selected, so - return NULL. */ - - *priority = 10; - - /* define the proxy for us to use */ - if(ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.ns_replica_uri, &name, NULL))) { - ORTE_ERROR_LOG(ret); - return NULL; - } - if(ORTE_SUCCESS != (ret = orte_dss.copy((void**)&orte_process_info.ns_replica, &name, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - return NULL; - } - - /* initialize the taglist */ - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_ns_proxy.tags), - (orte_std_cntr_t)orte_ns_proxy.block_size, - (orte_std_cntr_t)orte_ns_proxy.max_size, - (orte_std_cntr_t)orte_ns_proxy.block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_ns_proxy.num_tags = 0; - - /* initialize the dtlist */ - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_ns_proxy.dts), - (orte_std_cntr_t)orte_ns_proxy.block_size, - (orte_std_cntr_t)orte_ns_proxy.max_size, - (orte_std_cntr_t)orte_ns_proxy.block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_ns_proxy.num_dts = 0; - - /* setup the thread lock */ - OBJ_CONSTRUCT(&orte_ns_proxy.mutex, opal_mutex_t); - - /* Return the module */ - - initialized = true; - return &orte_ns_proxy_module; - - } else { - return NULL; - } -} - - -/* - * module init function - */ -int orte_ns_proxy_module_init(void) -{ - return ORTE_SUCCESS; -} - - -/* - * finalize routine - */ -int orte_ns_proxy_finalize(void) -{ - orte_ns_proxy_tagitem_t **tag; - orte_ns_proxy_dti_t **dti; - orte_std_cntr_t i; - - /* free all tracking storage, but only if this component was initialized */ - - if (initialized) { - tag = (orte_ns_proxy_tagitem_t**)(orte_ns_proxy.tags)->addr; - for (i=0; i < (orte_ns_proxy.tags)->size; i++) { - if (NULL != tag[i]) OBJ_RELEASE(tag[i]); - } - OBJ_RELEASE(orte_ns_proxy.tags); - - dti = (orte_ns_proxy_dti_t**)(orte_ns_proxy.dts)->addr; - for (i=0; i < (orte_ns_proxy.dts)->size; i++) { - if (NULL != dti[i]) OBJ_RELEASE(dti[i]); - } - OBJ_RELEASE(orte_ns_proxy.dts); - - initialized = false; - } - - /* All done */ - - return ORTE_SUCCESS; -} - -int orte_ns_proxy_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/ns/proxy/ns_proxy_diag_fns.c b/orte/mca/ns/proxy/ns_proxy_diag_fns.c deleted file mode 100644 index c1a671ff6e..0000000000 --- a/orte/mca/ns/proxy/ns_proxy_diag_fns.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/ns/base/base.h" -#include "ns_proxy.h" - -/* - * DIAGNOSTIC functions - */ -int orte_ns_proxy_dump_jobs(void) -{ - orte_buffer_t cmd; - orte_buffer_t answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - command = ORTE_NS_DUMP_JOBIDS_CMD; - - OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); - - /* dump name service replica jobid tracker */ - OBJ_CONSTRUCT(&cmd, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - OBJ_DESTRUCT(&cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, &cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_DESTRUCT(&cmd); - - OBJ_CONSTRUCT(&answer, orte_buffer_t); - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(&answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&answer); - return rc; - } - - if (ORTE_NS_DUMP_JOBIDS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&answer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&answer); - return rc; - } - - return ORTE_SUCCESS; -} - - -int orte_ns_proxy_dump_tags(void) -{ - orte_buffer_t cmd; - orte_buffer_t answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t i; - orte_std_cntr_t count; - orte_rml_tag_t j; - orte_ns_proxy_tagitem_t **ptr; - int rc; - - command = ORTE_NS_DUMP_TAGS_CMD; - - OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); - - /* dump name service replica tag tracker */ - OBJ_CONSTRUCT(&cmd, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - OBJ_DESTRUCT(&cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, &cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_DESTRUCT(&cmd); - - OBJ_CONSTRUCT(&answer, orte_buffer_t); - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(&answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&answer); - return rc; - } - - if (ORTE_NS_DUMP_TAGS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&answer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&answer); - return rc; - } - - /* dump local tag tracker */ - opal_output(mca_ns_base_output, "\n\n%s Dump of Local Tag Tracker\n", - ORTE_NAME_PRINT(orte_process_info.my_name)); - ptr = (orte_ns_proxy_tagitem_t**)(orte_ns_proxy.tags)->addr; - for (i=0, j=0; j < orte_ns_proxy.num_tags && - i < (orte_ns_proxy.tags)->size; i++) { - if (NULL != ptr[i]) { - j++; - opal_output(mca_ns_base_output, "Num: %lu\tTag: %lu\tTag name: %s\n", - (unsigned long)j, (unsigned long)ptr[i]->tag, ptr[i]->name); - } - } - - return ORTE_SUCCESS; -} - - -int orte_ns_proxy_dump_datatypes(void) -{ - orte_buffer_t cmd; - orte_buffer_t answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t i, j; - orte_std_cntr_t count; - orte_ns_proxy_dti_t **ptr; - int rc; - - command = ORTE_NS_DUMP_DATATYPES_CMD; - - OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); - - /* dump name service replica datatype tracker */ - OBJ_CONSTRUCT(&cmd, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - OBJ_DESTRUCT(&cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, &cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_DESTRUCT(&cmd); - - OBJ_CONSTRUCT(&answer, orte_buffer_t); - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(&answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&answer); - return rc; - } - - if (ORTE_NS_DUMP_DATATYPES_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&answer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&answer); - return rc; - } - - /* dump local datatype tracker */ - opal_output(mca_ns_base_output, "\n\n%s Dump of Local Datatype Tracker\n", - ORTE_NAME_PRINT(orte_process_info.my_name)); - ptr = (orte_ns_proxy_dti_t**)(orte_ns_proxy.dts)->addr; - for (i=0, j=0; j < orte_ns_proxy.num_dts && - i < (orte_ns_proxy.dts)->size; i++) { - if (NULL != ptr[i]) { - j++; - opal_output(mca_ns_base_output, "Num: %lu\tDatatype id: %lu\tDatatype name: %s\n", - (unsigned long)j, (unsigned long)ptr[i]->id, ptr[i]->name); - } - } - - return ORTE_SUCCESS; -} - - diff --git a/orte/mca/ns/proxy/ns_proxy_general_fns.c b/orte/mca/ns/proxy/ns_proxy_general_fns.c deleted file mode 100644 index 752e879cd5..0000000000 --- a/orte/mca/ns/proxy/ns_proxy_general_fns.c +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/rml/rml.h" - -#include "ns_proxy.h" - -/* - * PEER functions - */ -int orte_ns_proxy_get_peers(orte_process_name_t **procs, - orte_std_cntr_t *num_procs, opal_list_t *attrs) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count, nprocs, i; - orte_attribute_t *attr; - int rc; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); - - /* set default value */ - *procs = NULL; - *num_procs = 0; - - /* check the attributes to see if USE_JOB has been set. If not, then this is - * a request for my own job peers - process that one locally - */ - - if (NULL == (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_JOBID))) { - /* get my own job peers */ - *procs = (orte_process_name_t*)malloc(orte_process_info.num_procs * sizeof(orte_process_name_t)); - if (NULL == *procs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - for (i=0; i < orte_process_info.num_procs; i++) { - (*procs)[i].jobid = ORTE_PROC_MY_NAME->jobid; - (*procs)[i].vpid = i; - } - - *num_procs = orte_process_info.num_procs; - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_SUCCESS; - } - - /* non-local request for peers in another job - send to replica for processing */ - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_NS_GET_PEERS_CMD; - /* pack the command */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - /* pack the attributes */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, attrs, 1, ORTE_ATTR_LIST))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (ORTE_NS_GET_PEERS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &nprocs, &count, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - /* allocate space for array of proc names */ - if (0 < nprocs) { - *procs = (orte_process_name_t*)malloc((nprocs) * sizeof(orte_process_name_t)); - if (NULL == *procs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, *procs, &nprocs, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - } - *num_procs = nprocs; - - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_SUCCESS; -} - - -int orte_ns_proxy_assign_rml_tag(orte_rml_tag_t *tag, - char *name) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_ns_proxy_tagitem_t* tagitem, **tags; - orte_std_cntr_t count, i; - orte_rml_tag_t j; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); - - if (NULL != name) { - /* see if this name is already in list - if so, return tag */ - tags = (orte_ns_proxy_tagitem_t**)orte_ns_proxy.tags->addr; - for (i=0, j=0; j < orte_ns_proxy.num_tags && - i < (orte_ns_proxy.tags)->size; i++) { - if (NULL != tags[i]) { - j++; - if (tags[i]->name != NULL && - 0 == strcmp(name, tags[i]->name)) { /* found name on list */ - *tag = tags[i]->tag; - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_SUCCESS; - } - } - } - } - - /* okay, not on local list - so go get one from tag server */ - command = ORTE_NS_ASSIGN_OOB_TAG_CMD; - *tag = ORTE_RML_TAG_MAX; /* set the default error value */ - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (NULL == name) { - name = "NULL"; - } - - if (0 > (rc = orte_dss.pack(cmd, &name, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (ORTE_NS_ASSIGN_OOB_TAG_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, tag, &count, ORTE_RML_TAG))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - OBJ_RELEASE(answer); - - /* add the new tag to the local list so we don't have to get it again */ - tagitem = OBJ_NEW(orte_ns_proxy_tagitem_t); - if (NULL == tagitem) { /* out of memory */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&i, - orte_ns_proxy.tags, tagitem))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - tagitem->tag = *tag; - (orte_ns_proxy.num_tags)++; - if (NULL != name) { /* provided - can look it up later */ - tagitem->name = strdup(name); - } else { - tagitem->name = NULL; - } - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - - /* all done */ - return ORTE_SUCCESS; -} - - -int orte_ns_proxy_define_data_type(const char *name, - orte_data_type_t *type) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_ns_proxy_dti_t **dti, *dtip; - orte_std_cntr_t count, i, j; - int rc=ORTE_SUCCESS; - - if (NULL == name || 0 < *type) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); - - /* first, check to see if name is already on local list - * if so, return id, ensure registered with dss - */ - dti = (orte_ns_proxy_dti_t**)orte_ns_proxy.dts->addr; - for (i=0, j=0; j < orte_ns_proxy.num_dts && - i < orte_ns_proxy.dts->size; i++) { - if (NULL != dti[i]) { - j++; - if (dti[i]->name != NULL && - 0 == strcmp(name, dti[i]->name)) { /* found name on list */ - *type = dti[i]->id; - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_SUCCESS; - } - } - } - - - /* okay, not on local list - so go get one from tag server */ - command = ORTE_NS_DEFINE_DATA_TYPE_CMD; - *type = ORTE_DSS_ID_MAX; /* set the default error value */ - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&name, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (ORTE_NS_ASSIGN_OOB_TAG_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, type, &count, ORTE_DATA_TYPE))) { - ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_UNPACK_FAILURE; - } - OBJ_RELEASE(answer); - - /* add the new id to the local list so we don't have to get it again */ - dtip = OBJ_NEW(orte_ns_proxy_dti_t); - if (NULL == dtip) { /* out of memory */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - dtip->name = strdup(name); - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&i, - orte_ns_proxy.dts, dtip))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - dtip->id = *type; - (orte_ns_proxy.num_dts)++; - - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - - /* all done */ - return rc; -} - -/* - * Take advantage of the way the RML uses the process name as its index into - * the RML communicator table. Because the RML needs a name right away, it will - * automatically assign us one when it receives a message - and it communicates - * that assignment back to us automatically. Thus, to get a name for ourselves, - * all we have to do is send a message! No response from the replica is required. - */ -int orte_ns_proxy_create_my_name(void) -{ - orte_process_name_t new_name; - int ret; - - ret = orte_rml.get_new_name(&new_name); - if (ORTE_SUCCESS == ret) { - memcpy(ORTE_PROC_MY_NAME, &new_name, sizeof(orte_process_name_t)); - } - - return ret; -} diff --git a/orte/mca/ns/proxy/ns_proxy_job_fns.c b/orte/mca/ns/proxy/ns_proxy_job_fns.c deleted file mode 100644 index 54caee8103..0000000000 --- a/orte/mca/ns/proxy/ns_proxy_job_fns.c +++ /dev/null @@ -1,690 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -#include "orte_config.h" - -#include - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "ns_proxy.h" - -/**** CREATE JOBID ****/ -int orte_ns_proxy_create_jobid(orte_jobid_t *job, opal_list_t *attrs) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - /* set default value */ - *job = ORTE_JOBID_INVALID; - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_NS_CREATE_JOBID_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, attrs, 1, ORTE_ATTR_LIST))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_NS_CREATE_JOBID_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, job, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - - -/**** GET JOB DESCENDANTS ****/ -int orte_ns_proxy_get_job_descendants(orte_jobid_t **descendants, orte_std_cntr_t *num_desc, orte_jobid_t job) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count, ndesc=0; - orte_jobid_t *jobs=NULL; - int rc; - - OPAL_TRACE(1); - - /* set default response */ - *descendants = NULL; - *num_desc = 0; - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_NS_GET_JOB_DESC_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&job, 1, ORTE_JOBID))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_NS_GET_JOB_DESC_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ndesc, &count, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* if there are any descendants, allocate space for them and unpack */ - if (0 < ndesc) { - jobs = (orte_jobid_t*)malloc(ndesc * sizeof(orte_jobid_t)); - if (NULL == jobs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - return ORTE_ERR_OUT_OF_RESOURCE; - } - count = ndesc; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, jobs, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - } - - OBJ_RELEASE(answer); - - *descendants = jobs; - *num_desc = count; - - return ORTE_SUCCESS; -} - -/**** GET JOB CHILDREN ****/ -int orte_ns_proxy_get_job_children(orte_jobid_t **descendants, orte_std_cntr_t *num_desc, orte_jobid_t job) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count, ndesc=0; - orte_jobid_t *jobs=NULL; - int rc; - - OPAL_TRACE(1); - - /* set default response */ - *descendants = NULL; - *num_desc = 0; - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_NS_GET_JOB_CHILD_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&job, 1, ORTE_JOBID))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_NS_GET_JOB_DESC_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ndesc, &count, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* if there are any descendants, allocate space for them and unpack */ - if (0 < ndesc) { - jobs = (orte_jobid_t*)malloc(ndesc * sizeof(orte_jobid_t)); - if (NULL == jobs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - return ORTE_ERR_OUT_OF_RESOURCE; - } - count = ndesc; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, jobs, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - } - - OBJ_RELEASE(answer); - - *descendants = jobs; - *num_desc = count; - - return ORTE_SUCCESS; -} - -int orte_ns_proxy_get_root_job(orte_jobid_t *root_job, orte_jobid_t job) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - /* set default value */ - *root_job = ORTE_JOBID_INVALID; - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_NS_GET_ROOT_JOB_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_NS_GET_ROOT_JOB_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, root_job, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - -int orte_ns_proxy_get_parent_job(orte_jobid_t *parent, orte_jobid_t job) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - /* set default value */ - *parent = ORTE_JOBID_INVALID; - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_NS_GET_PARENT_JOB_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_NS_GET_PARENT_JOB_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, parent, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - - -int orte_ns_proxy_get_job_family(orte_jobid_t** family, orte_std_cntr_t *num_members, orte_jobid_t job) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count, ndesc=0; - orte_jobid_t *jobs=NULL; - int rc; - - OPAL_TRACE(1); - - /* set default response */ - *family = NULL; - *num_members = 0; - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_NS_GET_JOB_FAMILY_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&job, 1, ORTE_JOBID))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_NS_GET_JOB_FAMILY_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ndesc, &count, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - /* if there are any members, allocate space for them and unpack */ - if (0 < ndesc) { - jobs = (orte_jobid_t*)malloc(ndesc * sizeof(orte_jobid_t)); - if (NULL == jobs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(answer); - return ORTE_ERR_OUT_OF_RESOURCE; - } - count = ndesc; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, jobs, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - } - - OBJ_RELEASE(answer); - - *family = jobs; - *num_members = count; - - return ORTE_SUCCESS; -} - - -int orte_ns_proxy_reserve_range(orte_jobid_t job, orte_vpid_t range, orte_vpid_t *starting_vpid) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - /* set default return value */ - *starting_vpid = ORTE_VPID_INVALID; - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_NS_RESERVE_RANGE_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&job, 1, ORTE_JOBID))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&range, 1, ORTE_VPID))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if ((ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) - || (ORTE_NS_RESERVE_RANGE_CMD != command)) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, starting_vpid, &count, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - -int orte_ns_proxy_get_vpid_range(orte_jobid_t job, orte_vpid_t *range) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - /* set default return value */ - *range = ORTE_VPID_INVALID; - - if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - command = ORTE_NS_GET_VPID_RANGE_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&job, 1, ORTE_JOBID))) { /* got a problem */ - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - - if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if ((ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) - || (ORTE_NS_GET_VPID_RANGE_CMD != command)) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, range, &count, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - OBJ_RELEASE(answer); - return ORTE_SUCCESS; -} - diff --git a/orte/mca/ns/replica/Makefile.am b/orte/mca/ns/replica/Makefile.am deleted file mode 100644 index e6f101f64f..0000000000 --- a/orte/mca/ns/replica/Makefile.am +++ /dev/null @@ -1,51 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_pkgdata_DATA = help-ns-replica.txt - -sources = \ - ns_replica.h \ - ns_replica_class_instances.h \ - ns_replica_cell_fns.c \ - ns_replica_diag_fns.c \ - ns_replica_general_fns.c \ - ns_replica_job_fns.c \ - ns_replica_recv.c \ - ns_replica_support_fns.c \ - ns_replica_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_ns_replica_DSO -component_noinst = -component_install = mca_ns_replica.la -else -component_noinst = libmca_ns_replica.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_ns_replica_la_SOURCES = $(sources) -mca_ns_replica_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_ns_replica_la_SOURCES =$(sources) -libmca_ns_replica_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/ns/replica/configure.params b/orte/mca/ns/replica/configure.params deleted file mode 100644 index 3513f8d956..0000000000 --- a/orte/mca/ns/replica/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/ns/replica/help-ns-replica.txt b/orte/mca/ns/replica/help-ns-replica.txt deleted file mode 100644 index 230bfc3bfa..0000000000 --- a/orte/mca/ns/replica/help-ns-replica.txt +++ /dev/null @@ -1,33 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2006 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -[out-of-jobids] -The system has exhausted its available jobids - the application is attempting -to spawn too many jobs and will be aborted. - -This may be resolved by increasing the number of available jobids by -re-configuring Open MPI with the --enable-jumbo-dynamics option, and then -re-running the application -# -[out-of-vpids] -The system has exhausted its available ranks - the application is attempting -to spawn too many processes and will be aborted. - -This may be resolved by increasing the number of available ranks by -re-configuring Open MPI with the --enable-jumbo-apps option, and then -re-running the application diff --git a/orte/mca/ns/replica/ns_replica.h b/orte/mca/ns/replica/ns_replica.h deleted file mode 100644 index 984a5ca011..0000000000 --- a/orte/mca/ns/replica/ns_replica.h +++ /dev/null @@ -1,224 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef NS_REPLICA_H -#define NS_REPLICA_H - -#include "orte_config.h" -#include "orte/orte_types.h" -#include "orte/orte_constants.h" -#include "opal/threads/mutex.h" -#include "opal/class/opal_object.h" -#include "orte/class/orte_pointer_array.h" -#include "orte/dss/dss.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/ns/base/base.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * globals - */ -#define NS_REPLICA_MAX_STRING_SIZE 256 - -/* - * object for tracking vpids and jobids for job families - * This structure is used to track the parent-child relationship between - * jobs. The "root" of the family is the initial parent - each child has - * a record under that parent. Any child that subsequently spawns its own - * children will form a list of jobids beneath them. - * - * each object records the jobid of the job it represents, and the next vpid - * that will be assigned when a range is requested. - */ -typedef struct { - opal_list_item_t super; - orte_jobid_t jobid; - orte_vpid_t next_vpid; - opal_list_t children; -} orte_ns_replica_jobitem_t; -OBJ_CLASS_DECLARATION(orte_ns_replica_jobitem_t); - - -struct orte_ns_replica_tagitem_t { - opal_object_t super; - orte_rml_tag_t tag; /**< OOB tag */ - char *name; /**< Name associated with tag */ -}; -typedef struct orte_ns_replica_tagitem_t orte_ns_replica_tagitem_t; - -OBJ_CLASS_DECLARATION(orte_ns_replica_tagitem_t); - -struct orte_ns_replica_dti_t { - opal_object_t super; - orte_data_type_t id; /**< data type id */ - char *name; /**< Name associated with data type */ -}; -typedef struct orte_ns_replica_dti_t orte_ns_replica_dti_t; - -OBJ_CLASS_DECLARATION(orte_ns_replica_dti_t); - -/* - * globals needed within component - */ -typedef struct { - size_t max_size, block_size; - orte_nodeid_t next_nodeid; - orte_pointer_array_t *nodenames; - orte_jobid_t num_jobids; - opal_list_t jobs; - orte_pointer_array_t *tags; - orte_rml_tag_t num_tags; - orte_pointer_array_t *dts; - orte_data_type_t num_dts; - int debug; - bool isolate; - opal_mutex_t mutex; -} orte_ns_replica_globals_t; - -extern orte_ns_replica_globals_t orte_ns_replica; - -/* - * Module open / close - */ -int orte_ns_replica_open(void); -int orte_ns_replica_close(void); - - -/* - * Startup / Shutdown - */ -mca_ns_base_module_t* orte_ns_replica_init(int *priority); -int orte_ns_replica_module_init(void); -int orte_ns_replica_finalize(void); - -/* - * oob interface - */ - -void orte_ns_replica_recv(int status, orte_process_name_t* sender, - orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); - -/* - * NODE FUNCTIONS - */ -int orte_ns_replica_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodenames); - -int orte_ns_replica_get_node_info(char ***nodenames, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); - -/* - * JOB FUNCTIONS - */ -int orte_ns_replica_create_jobid(orte_jobid_t *jobid, opal_list_t *attrs); - -int orte_ns_replica_get_job_descendants(orte_jobid_t **descendants, orte_std_cntr_t *num_desc, orte_jobid_t job); - -int orte_ns_replica_get_job_children(orte_jobid_t **descendants, orte_std_cntr_t *num_desc, orte_jobid_t job); - -int orte_ns_replica_get_root_job(orte_jobid_t *root_job, orte_jobid_t job); - -int orte_ns_replica_get_parent_job(orte_jobid_t *parent, orte_jobid_t job); - -int orte_ns_replica_get_job_family(orte_jobid_t **family, orte_std_cntr_t *num_members, orte_jobid_t job); - -int orte_ns_replica_reserve_range(orte_jobid_t job, - orte_vpid_t range, - orte_vpid_t *startvpid); - -int orte_ns_replica_get_vpid_range(orte_jobid_t job, orte_vpid_t *range); - -/* - * GENERAL FUNCTIONS - */ -int orte_ns_replica_get_peers(orte_process_name_t **procs, - orte_std_cntr_t *num_procs, opal_list_t *attrs); - -int orte_ns_replica_assign_rml_tag(orte_rml_tag_t *tag, - char *name); - - -int orte_ns_replica_define_data_type(const char *name, - orte_data_type_t *type); - -int orte_ns_replica_create_my_name(void); - - -/* - * DIAGNOSTIC FUNCTIONS - */ -int orte_ns_replica_dump_jobs(void); -int orte_ns_replica_dump_jobs_fn(orte_buffer_t *buffer); - -int orte_ns_replica_dump_tags(void); -int orte_ns_replica_dump_tags_fn(orte_buffer_t *buffer); - -int orte_ns_replica_dump_datatypes(void); -int orte_ns_replica_dump_datatypes_fn(orte_buffer_t *buffer); - -int orte_ns_replica_ft_event(int state); - -/* - * INTERNAL SUPPORT FUNCTIONS - */ - -/* find a job's record, wherever it may be located on the list of job families. - * this function searches the entire list of job families, traversing the list - * of all jobs in each family, until it finds the specified job. It then returns - * a pointer to the that job's info structure. It returns - * NULL (without error_logging an error) if no record is found - */ -orte_ns_replica_jobitem_t* orte_ns_replica_find_job(orte_jobid_t job); - -/* find the root job for the specified job. - * this function searches the entire list of job families, traversing the list - * of all jobs in each family, until it finds the specified job. It then returns - * a pointer to the root job's info structure for that job family. It returns - * NULL (without error_logging an error) if no record is found - */ -orte_ns_replica_jobitem_t* orte_ns_replica_find_root_job(orte_jobid_t job); - -/* find a job's record on a specified root's family tree. - * this function finds the family record for the specified root job. It then - * traverses the children of that root until it finds the specified job, and then - * returns a pointer to that job's info structure. If root=jobid, then it will - * return a pointer to the root job's info structure. It returns - * NULL (without error_logging an error) if no record is found - */ -orte_ns_replica_jobitem_t* orte_ns_replica_search_job_family_tree(orte_jobid_t root, orte_jobid_t jobid); - -/* given a job's record, create a flattened list of descendants below it */ -void orte_ns_replica_construct_flattened_tree(opal_list_t *tree, orte_ns_replica_jobitem_t *ptr); - -/* search down a tree, following all the children's branches, to find the specified - * job. Return a pointer to that object, and a pointer to the parent object - * This function is called recursively, so it passes into it the ptr to the - * current object being looked at - */ -orte_ns_replica_jobitem_t *down_search(orte_ns_replica_jobitem_t *ptr, - orte_ns_replica_jobitem_t **parent_ptr, - orte_jobid_t job); - -ORTE_MODULE_DECLSPEC extern mca_ns_base_component_t mca_ns_replica_component; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/mca/ns/replica/ns_replica_cell_fns.c b/orte/mca/ns/replica/ns_replica_cell_fns.c deleted file mode 100644 index dd6324cf04..0000000000 --- a/orte/mca/ns/replica/ns_replica_cell_fns.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ -#include "orte_config.h" - -#include -#include - -#include "opal/threads/mutex.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/base.h" -#include "ns_replica.h" - -/* - * functions - */ - -/* - * NODEID - */ -int orte_ns_replica_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodenames) -{ - orte_nodeid_t *nds, nid, m; - orte_std_cntr_t k, n, num_nodes; - char **nodes; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - num_nodes = opal_argv_count(nodenames); - if (0 == num_nodes) { /** no nodenames provided - just return */ - *nodeids = NULL; - *nnodes = 0; - return ORTE_SUCCESS; - } - - nds = (orte_nodeid_t*)malloc(num_nodes * sizeof(orte_nodeid_t)); - if (NULL == nds) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - nodes = (char**)(orte_ns_replica.nodenames->addr); - for (n=0; n < num_nodes; n++) { - for (k=0, m=0; m < orte_ns_replica.next_nodeid && - k < (orte_ns_replica.nodenames)->size; k++) { - if (NULL != nodes[k]) { - m++; - if (strcmp(nodenames[n], nodes[k]) == 0) { /** found same name */ - nid = m; - goto ASSIGN; - } - } - } - /** get here if we don't find this nodename - add it */ - nid = orte_ns_replica.next_nodeid++; - -ASSIGN: - nds[n] = nid; - } /** for n */ - - *nodeids = nds; - *nnodes = num_nodes; - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - -int orte_ns_replica_get_node_info(char ***nodenames, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids) -{ - char **names; - orte_std_cntr_t n; - char **nodes; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - if (0 == num_nodes) { - *nodenames = NULL; - return ORTE_SUCCESS; - } - - /** allocate an extra space for the NULL termination */ - names = (char**)malloc((num_nodes+1) * sizeof(char*)); - if (NULL == names) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - names[num_nodes] = NULL; /** NULL-terminate the list */ - - nodes = (char**)(orte_ns_replica.nodenames->addr); - for (n=0; n < num_nodes; n++) { - if (nodeids[n] >= orte_ns_replica.next_nodeid) { - names[n] = strdup("invalid nodeid"); - } else if (NULL != nodes[nodeids[n]]) { - names[n] = strdup(nodes[nodeids[n]]); - } else { - names[n] = strdup("unknown nodeid"); - } - } - - *nodenames = names; - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - diff --git a/orte/mca/ns/replica/ns_replica_class_instances.h b/orte/mca/ns/replica/ns_replica_class_instances.h deleted file mode 100644 index 467925c4f5..0000000000 --- a/orte/mca/ns/replica/ns_replica_class_instances.h +++ /dev/null @@ -1,111 +0,0 @@ -/* -*- C -*- -* -* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -* University Research and Technology -* Corporation. All rights reserved. -* Copyright (c) 2004-2006 The University of Tennessee and The University -* of Tennessee Research Foundation. All rights -* reserved. -* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -* University of Stuttgart. All rights reserved. -* Copyright (c) 2004-2005 The Regents of the University of California. -* All rights reserved. -* $COPYRIGHT$ -* -* Additional copyrights may follow -* -* $HEADER$ -* -*/ -#ifndef NS_REPLICA_CLASS_INSTANCES_H -#define NS_REPLICA_CLASS_INSTANCES_H - -#include "orte_config.h" -#include "orte/orte_types.h" -#include "orte/orte_constants.h" -#include "opal/threads/mutex.h" -#include "opal/class/opal_object.h" -#include "orte/class/orte_pointer_array.h" -#include "orte/dss/dss.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/ns/base/base.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/*** JOBITEM ***/ -/* constructor - used to initialize state of jobitem instance */ -static void orte_ns_replica_jobitem_construct(orte_ns_replica_jobitem_t *ptr) -{ - ptr->jobid = ORTE_JOBID_INVALID; - ptr->next_vpid = 0; - OBJ_CONSTRUCT(&ptr->children, opal_list_t); -} - -/* destructor - used to free any resources held by instance */ -static void orte_ns_replica_jobitem_destructor(orte_ns_replica_jobitem_t *ptr){ - opal_list_item_t *item; - - while (NULL != (item = opal_list_remove_first(&ptr->children))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&ptr->children); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE(orte_ns_replica_jobitem_t, /* type name */ - opal_list_item_t, /* parent "class" name */ - orte_ns_replica_jobitem_construct, /* constructor */ - orte_ns_replica_jobitem_destructor); /* destructor */ - - -/*** RML TAG ***/ -/* constructor - used to initialize state of taglist instance */ -static void orte_ns_replica_tagitem_construct(orte_ns_replica_tagitem_t* tagitem) -{ - tagitem->tag = ORTE_RML_TAG_MAX; - tagitem->name = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_ns_replica_tagitem_destructor(orte_ns_replica_tagitem_t* tagitem) -{ - if (NULL != tagitem->name) { - free(tagitem->name); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE(orte_ns_replica_tagitem_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_ns_replica_tagitem_construct, /* constructor */ - orte_ns_replica_tagitem_destructor); /* destructor */ - - -/*** DATA TYPE ***/ -/* constructor - used to initialize state of dtilist instance */ -static void orte_ns_replica_dti_construct(orte_ns_replica_dti_t* dti) -{ - dti->id = ORTE_DSS_ID_MAX; - dti->name = NULL; -} - -/* destructor - used to free any resources held by instance */ -static void orte_ns_replica_dti_destructor(orte_ns_replica_dti_t* dti) -{ - if (NULL != dti->name) { - free(dti->name); - } -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE(orte_ns_replica_dti_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_ns_replica_dti_construct, /* constructor */ - orte_ns_replica_dti_destructor); /* destructor */ - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/mca/ns/replica/ns_replica_component.c b/orte/mca/ns/replica/ns_replica_component.c deleted file mode 100644 index fd02073ea8..0000000000 --- a/orte/mca/ns/replica/ns_replica_component.c +++ /dev/null @@ -1,328 +0,0 @@ -/* -*- C -*- -* -* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -* University Research and Technology -* Corporation. All rights reserved. -* Copyright (c) 2004-2005 The University of Tennessee and The University -* of Tennessee Research Foundation. All rights -* reserved. -* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -* University of Stuttgart. All rights reserved. -* Copyright (c) 2004-2005 The Regents of the University of California. -* All rights reserved. -* $COPYRIGHT$ -* -* Additional copyrights may follow -* -* $HEADER$ -*/ -/** @file: -* -* The Open MPI Name Server -* -* The Open MPI Name Server provides unique name ranges for processes -* within the universe. Each universe will have one name server -* running within the seed daemon. This is done to prevent the -* inadvertent duplication of names. -*/ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/threads/mutex.h" -#include "opal/class/opal_list.h" -#include "opal/util/output.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/ns/base/ns_private.h" -#include "ns_replica.h" - - -/* - * Struct of function pointers that need to be initialized - */ -mca_ns_base_component_t mca_ns_replica_component = { -{ - MCA_NS_BASE_VERSION_2_0_0, - - "replica", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_ns_replica_open, /* module open */ - orte_ns_replica_close /* module close */ -}, -{ - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT -}, -orte_ns_replica_init, /* module init */ -orte_ns_replica_finalize /* module shutdown */ -}; - -/* - * setup the function pointers for the module - */ -static mca_ns_base_module_t orte_ns_replica_module = { - /* init */ - orte_ns_replica_module_init, - /** node functions */ - orte_ns_replica_create_nodeids, - orte_ns_replica_get_node_info, - orte_ns_base_convert_nodeid_to_string, - orte_ns_base_convert_string_to_nodeid, - /* jobid functions */ - orte_ns_replica_create_jobid, - orte_ns_replica_get_job_descendants, - orte_ns_replica_get_job_children, - orte_ns_replica_get_root_job, - orte_ns_replica_get_parent_job, - orte_ns_replica_get_job_family, - orte_ns_base_get_jobid_string, - orte_ns_base_convert_jobid_to_string, - orte_ns_base_convert_string_to_jobid, - orte_ns_replica_reserve_range, - orte_ns_replica_get_vpid_range, - /* vpid functions */ - orte_ns_base_get_vpid_string, - orte_ns_base_convert_vpid_to_string, - orte_ns_base_convert_string_to_vpid, - /* name functions */ - orte_ns_base_create_process_name, - orte_ns_replica_create_my_name, - orte_ns_base_convert_string_to_process_name, - orte_ns_base_get_proc_name_string, - orte_ns_base_compare_fields, - /* peer functions */ - orte_ns_replica_get_peers, - /* tag server functions */ - orte_ns_replica_assign_rml_tag, - /* data type functions */ - orte_ns_replica_define_data_type, - /* diagnostic functions */ - orte_ns_replica_dump_jobs, - orte_ns_replica_dump_tags, - orte_ns_replica_dump_datatypes, - orte_ns_replica_ft_event -}; - -/* - * Whether or not we allowed this component to be selected - */ -static bool initialized = false; - -/* - * class instantiations - */ -#include "ns_replica_class_instances.h" - -/* - * globals needed within replica component - */ -orte_ns_replica_globals_t orte_ns_replica; - -/* - * don't really need this function - could just put NULL in the above structure - * Just holding the place in case we decide there is something we need to do - */ -int orte_ns_replica_open(void) -{ - int id, param; - - id = mca_base_param_register_int("ns", "replica", "debug", NULL, (int)false); - mca_base_param_lookup_int(id, &orte_ns_replica.debug); - - id = mca_base_param_register_int("ns", "replica", "isolate", NULL, (int)false); - mca_base_param_lookup_int(id, ¶m); - if (param) { - orte_ns_replica.isolate = true; - } else { - orte_ns_replica.isolate = false; - } - - id = mca_base_param_register_int("ns", "replica", "maxsize", NULL, - ORTE_NS_ARRAY_MAX_SIZE); - mca_base_param_lookup_int(id, ¶m); - orte_ns_replica.max_size = (size_t)param; - - id = mca_base_param_register_int("ns", "replica", "blocksize", NULL, - ORTE_NS_ARRAY_BLOCK_SIZE); - mca_base_param_lookup_int(id, ¶m); - orte_ns_replica.block_size = (size_t)param; - - return ORTE_SUCCESS; -} - -/* - * ditto for this one - */ -int orte_ns_replica_close(void) -{ - return ORTE_SUCCESS; -} - -mca_ns_base_module_t* orte_ns_replica_init(int *priority) -{ - int rc; - - /* If we are to host a replica, then we want to be selected, so do all the - setup and return the module */ - - if (NULL == orte_process_info.ns_replica_uri) { - - /* Return a module (choose an arbitrary, positive priority -- - it's only relevant compared to other ns components). If - we're not the seed, then we don't want to be selected, so - return NULL. */ - - *priority = 50; - - /* initialize the node tracker */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_ns_replica.nodenames), - (orte_std_cntr_t)orte_ns_replica.block_size, - (orte_std_cntr_t)orte_ns_replica.max_size, - (orte_std_cntr_t)orte_ns_replica.block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_ns_replica.next_nodeid = 0; - - /* initialize the job tracking system */ - OBJ_CONSTRUCT(&orte_ns_replica.jobs, opal_list_t); - orte_ns_replica.num_jobids = 0; - - /* initialize the taglist */ - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_ns_replica.tags), - (orte_std_cntr_t)orte_ns_replica.block_size, - (orte_std_cntr_t)orte_ns_replica.max_size, - (orte_std_cntr_t)orte_ns_replica.block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_ns_replica.num_tags = 0; - - /* initialize the dtlist */ - - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_ns_replica.dts), - (orte_std_cntr_t)orte_ns_replica.block_size, - (orte_std_cntr_t)orte_ns_replica.max_size, - (orte_std_cntr_t)orte_ns_replica.block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_ns_replica.num_dts = 0; - - /* setup the thread lock */ - OBJ_CONSTRUCT(&orte_ns_replica.mutex, opal_mutex_t); - - /* Return the module */ - - initialized = true; - return &orte_ns_replica_module; - } else { - return NULL; - } -} - -int orte_ns_replica_module_init(void) -{ - int rc; - if (orte_ns_replica.isolate) { - return ORTE_SUCCESS; - } - - /* issue non-blocking receive for call_back function */ - rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NS, ORTE_RML_PERSISTENT, orte_ns_replica_recv, NULL); - if(rc < 0) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - - -/* - * finalize routine - */ -int orte_ns_replica_finalize(void) -{ - char **cptr; - opal_list_item_t *item; - orte_ns_replica_tagitem_t **tag; - orte_ns_replica_dti_t **dti; - orte_std_cntr_t i; - orte_nodeid_t j; - - /* free all tracking storage, but only if this component was initialized */ - - if (initialized) { - cptr = (char**)(orte_ns_replica.nodenames)->addr; - for (i=0, j=0; j < orte_ns_replica.next_nodeid && - i < (orte_ns_replica.nodenames)->size; i++) { - if (NULL != cptr[i]) { - j++; - free(cptr[i]); - } - } - OBJ_RELEASE(orte_ns_replica.nodenames); - - while (NULL != (item = opal_list_remove_first(&orte_ns_replica.jobs))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&orte_ns_replica.jobs); - - tag = (orte_ns_replica_tagitem_t**)(orte_ns_replica.tags)->addr; - for (i=0; i < (orte_ns_replica.tags)->size; i++) { - if (NULL != tag[i]) OBJ_RELEASE(tag[i]); - } - OBJ_RELEASE(orte_ns_replica.tags); - - dti = (orte_ns_replica_dti_t**)(orte_ns_replica.dts)->addr; - for (i=0; i < (orte_ns_replica.dts)->size; i++) { - if (NULL != dti[i]) OBJ_RELEASE(dti[i]); - } - OBJ_RELEASE(orte_ns_replica.dts); - - initialized = false; - } - - /* All done */ - if (orte_ns_replica.isolate) { - return ORTE_SUCCESS; - } - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NS); - return ORTE_SUCCESS; -} - -int orte_ns_replica_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/ns/replica/ns_replica_diag_fns.c b/orte/mca/ns/replica/ns_replica_diag_fns.c deleted file mode 100644 index b6f9bdd9af..0000000000 --- a/orte/mca/ns/replica/ns_replica_diag_fns.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ -#include "orte_config.h" - -#include -#include - -#include "opal/threads/mutex.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/base.h" -#include "orte/mca/ns/base/ns_private.h" -#include "ns_replica.h" - -/* - * DIAGNOSTIC functions - */ -int orte_ns_replica_dump_jobs(void) -{ - orte_buffer_t buffer; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_jobs_fn(&buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buffer); - return rc; - } - - OBJ_DESTRUCT(&buffer); - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - -static int dump_child_jobs(orte_ns_replica_jobitem_t *ptr, char *prefix, orte_buffer_t *buffer) -{ - opal_list_item_t *item; - orte_ns_replica_jobitem_t *child; - char *tmp; - int rc; - char *pfx; - - asprintf(&pfx, "%s ", prefix); - - /* print out the children's info */ - for (item = opal_list_get_first(&ptr->children); - item != opal_list_get_end(&ptr->children); - item = opal_list_get_next(item)) { - child = (orte_ns_replica_jobitem_t*)item; - asprintf(&tmp, "%sChild jobid: %ld Next vpid: %ld Num direct children: %ld\n", - pfx, (long)child->jobid, (long)child->next_vpid, (long)opal_list_get_size(&child->children)); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - free(tmp); - if (ORTE_SUCCESS != (rc = dump_child_jobs(child, pfx, buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - free(pfx); - - return ORTE_SUCCESS; -} - -int orte_ns_replica_dump_jobs_fn(orte_buffer_t *buffer) -{ - orte_ns_replica_jobitem_t *root; - opal_list_item_t *item; - char *tmp; - int rc; - char *prefix = " "; - - asprintf(&tmp, "Dump of Name Service Jobid Tracker\n"); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - free(tmp); - - for (item = opal_list_get_first(&orte_ns_replica.jobs); - item != opal_list_get_end(&orte_ns_replica.jobs); - item = opal_list_get_next(item)) { - root = (orte_ns_replica_jobitem_t*)item; - asprintf(&tmp, " Data for job family with root %ld\n", (long)root->jobid); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - free(tmp); - asprintf(&tmp, "%sNext vpid: %ld Num direct children: %ld\n", - prefix, (long)root->next_vpid, (long)opal_list_get_size(&root->children)); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - free(tmp); - if (ORTE_SUCCESS != (rc = dump_child_jobs(root, prefix, buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - return ORTE_SUCCESS; -} - - -int orte_ns_replica_dump_tags(void) -{ - orte_buffer_t buffer; - int rc; - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_tags_fn(&buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buffer); - return rc; - } - - OBJ_DESTRUCT(&buffer); - return ORTE_SUCCESS; -} - - -int orte_ns_replica_dump_tags_fn(orte_buffer_t *buffer) -{ - orte_std_cntr_t i; - orte_rml_tag_t j; - orte_ns_replica_tagitem_t **ptr; - char tmp_out[NS_REPLICA_MAX_STRING_SIZE], *tmp; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - tmp = tmp_out; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Dump of Name Service RML Tag Tracker\n"); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - ptr = (orte_ns_replica_tagitem_t**)(orte_ns_replica.tags)->addr; - for (i=0, j=0; j < orte_ns_replica.num_tags && - i < (orte_ns_replica.tags)->size; i++) { - if (NULL != ptr[i]) { - j++; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Num: %lu\tTag id: %lu\tName: %s\n", - (unsigned long)j, (unsigned long)ptr[i]->tag, ptr[i]->name); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - } - } - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - - return ORTE_SUCCESS; -} - - -int orte_ns_replica_dump_datatypes(void) -{ - orte_buffer_t buffer; - int rc; - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_datatypes_fn(&buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buffer); - return rc; - } - - OBJ_DESTRUCT(&buffer); - return ORTE_SUCCESS; -} - -int orte_ns_replica_dump_datatypes_fn(orte_buffer_t *buffer) -{ - orte_std_cntr_t i, j; - orte_ns_replica_dti_t **ptr; - char tmp_out[NS_REPLICA_MAX_STRING_SIZE], *tmp; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - tmp = tmp_out; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Dump of Name Service Datatype Tracker\n"); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - ptr = (orte_ns_replica_dti_t**)(orte_ns_replica.dts)->addr; - for (i=0, j=0; j < orte_ns_replica.num_dts && - i < (orte_ns_replica.dts)->size; i++) { - if (NULL != ptr[i]) { - j++; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Num: %lu\tDatatype id: %lu\tName: %s\n", - (unsigned long)j, (unsigned long)ptr[i]->id, ptr[i]->name); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - } - } - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/ns/replica/ns_replica_general_fns.c b/orte/mca/ns/replica/ns_replica_general_fns.c deleted file mode 100644 index afe8d64d82..0000000000 --- a/orte/mca/ns/replica/ns_replica_general_fns.c +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ -#include "orte_config.h" - -#include -#include - -#include "opal/threads/mutex.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/ns_private.h" -#include "ns_replica.h" - - -/*** GET PEERS ***/ -int orte_ns_replica_get_peers(orte_process_name_t **procs, - orte_std_cntr_t *num_procs, opal_list_t *attrs) -{ - orte_std_cntr_t i, isave, npeers; - orte_jobid_t *jptr; - orte_attribute_t *attr; - orte_ns_replica_jobitem_t *job_info, *child; - opal_list_item_t *item; - opal_list_t peerlist; - int rc; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - /* set default value */ - *procs = NULL; - *num_procs = 0; - - /* check the attributes to see if USE_JOB has been set. If not, then this is - * a request for my own job peers - process that one locally - */ - - if (NULL == (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_JOBID))) { - /* get my own job peers */ - *procs = (orte_process_name_t*)malloc(orte_process_info.num_procs * sizeof(orte_process_name_t)); - if (NULL == *procs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - for (i=0; i < orte_process_info.num_procs; i++) { - (*procs)[i].jobid = ORTE_PROC_MY_NAME->jobid; - (*procs)[i].vpid = i; - } - - *num_procs = orte_process_info.num_procs; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - - /* we get here if the job attribute was passed to us - use that jobid */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - - /* look up this job's record on the tracking database */ - if (NULL == (job_info = orte_ns_replica_find_job(*jptr))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; - } - - if (NULL != (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_INCLUDE_DESCENDANTS))) { - /* we want the peers from this job AND ALL of its descendants - start by constructing - * a flattened list of the descendant jobs - */ - OBJ_CONSTRUCT(&peerlist, opal_list_t); - child = OBJ_NEW(orte_ns_replica_jobitem_t); - child->jobid = job_info->jobid; - child->next_vpid = job_info->next_vpid; - opal_list_append(&peerlist, &child->super); /* add the current job to the list */ - orte_ns_replica_construct_flattened_tree(&peerlist, job_info); - - i = opal_list_get_size(&peerlist); - npeers = 0; - if (0 < i) { - for (item = opal_list_get_first(&peerlist); - item != opal_list_get_end(&peerlist); - item = opal_list_get_next(item)) { - child = (orte_ns_replica_jobitem_t*)item; - npeers += child->next_vpid; - } - if (0 >= npeers) { - *num_procs = npeers; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - - *procs = (orte_process_name_t*)malloc(npeers * sizeof(orte_process_name_t)); - if (NULL == *procs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* populate it from the list */ - isave = 0; - while (NULL != (item = opal_list_remove_first(&peerlist))) { - child = (orte_ns_replica_jobitem_t*)item; - for (i=0; i < child->next_vpid; i++) { - (*procs)[i+isave].jobid = child->jobid; - (*procs)[i+isave].vpid = i; - } - isave += child->next_vpid; - } - } - *num_procs = npeers; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - - if (NULL != (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_INCLUDE_CHILDREN))) { - /* we want the peers from this job AND ONLY its immediate children */ - - /* determine the number of peers we are going to have */ - npeers = job_info->next_vpid; - for (item = opal_list_get_first(&job_info->children); - item != opal_list_get_end(&job_info->children); - item = opal_list_get_next(item)) { - child = (orte_ns_replica_jobitem_t*)item; - npeers += child->next_vpid; - } - - /* create the array */ - if (0 < npeers) { - *procs = (orte_process_name_t*)malloc(npeers * sizeof(orte_process_name_t)); - if (NULL == *procs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* populate it, starting with the specified job followed by its children */ - for (i=0; i < job_info->next_vpid; i++) { - (*procs)[i].jobid = *jptr; - (*procs)[i].vpid = i; - } - isave = job_info->next_vpid; - for (item = opal_list_get_first(&job_info->children); - item != opal_list_get_end(&job_info->children); - item = opal_list_get_next(item)) { - child = (orte_ns_replica_jobitem_t*)item; - for (i=0; i < child->next_vpid; i++) { - (*procs)[i+isave].jobid = child->jobid; - (*procs)[i+isave].vpid = i; - } - isave += child->next_vpid; - } - } - *num_procs = npeers; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - - /* get here if we want just the peers for the specified job */ - - /* create the array of peers */ - if (0 < job_info->next_vpid) { - *procs = (orte_process_name_t*)malloc(job_info->next_vpid * sizeof(orte_process_name_t)); - if (NULL == *procs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - for (i=0; i < job_info->next_vpid; i++) { - (*procs)[i].jobid = *jptr; - (*procs)[i].vpid = i; - } - } - - *num_procs = job_info->next_vpid; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - - - -/* - * TAG SERVER functions - */ -int orte_ns_replica_assign_rml_tag(orte_rml_tag_t *tag, - char *name) -{ - orte_ns_replica_tagitem_t *tagitem, **tags; - orte_std_cntr_t i; - orte_rml_tag_t j; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - if (NULL != name) { - /* see if this name is already in list - if so, return tag */ - tags = (orte_ns_replica_tagitem_t**)orte_ns_replica.tags->addr; - for (i=0, j=0; j < orte_ns_replica.num_tags && - i < (orte_ns_replica.tags)->size; i++) { - if (NULL != tags[i]) { - j++; - if (tags[i]->name != NULL && - 0 == strcmp(name, tags[i]->name)) { /* found name on list */ - *tag = tags[i]->tag; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - } - } - } - - /* not in list or not provided, so allocate next tag */ - *tag = ORTE_RML_TAG_MAX; - - /* check if tag is available - need to do this since the tag type - * is probably not going to be a orte_std_cntr_t, so we cannot just rely - * on the pointer_array's size limits to protect us. NOTE: need to - * reserve ORTE_RML_TAG_MAX as an invalid value, so can't let - * num_tags get there - */ - if (ORTE_RML_TAG_MAX-2 < orte_ns_replica.num_tags) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - tagitem = OBJ_NEW(orte_ns_replica_tagitem_t); - if (NULL == tagitem) { /* out of memory */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&i, - orte_ns_replica.tags, tagitem))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - tagitem->tag = orte_ns_replica.num_tags + ORTE_RML_TAG_DYNAMIC; - (orte_ns_replica.num_tags)++; - if (NULL != name) { /* provided - can look it up later */ - tagitem->name = strdup(name); - } else { - tagitem->name = NULL; - } - - *tag = tagitem->tag; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - - -/* - * DATA TYPE SERVER functions - */ -int orte_ns_replica_define_data_type(const char *name, - orte_data_type_t *type) -{ - orte_ns_replica_dti_t **dti, *dtip; - orte_std_cntr_t i, j; - int rc; - - if (NULL == name || 0 < *type) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - dti = (orte_ns_replica_dti_t**)orte_ns_replica.dts->addr; - for (i=0, j=0; j < orte_ns_replica.num_dts && - i < orte_ns_replica.dts->size; i++) { - if (NULL != dti[i]) { - j++; - if (dti[i]->name != NULL && - 0 == strcmp(name, dti[i]->name)) { /* found name on list */ - *type = dti[i]->id; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - } - } - - /* not in list or not provided, so allocate next id */ - *type = ORTE_DSS_ID_MAX; - - /* check if id is available - need to do this since the data type - * is probably not going to be a orte_std_cntr_t, so we cannot just rely - * on the pointer_array's size limits to protect us. - */ - if (ORTE_DSS_ID_MAX-2 < orte_ns_replica.num_dts) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - dtip = OBJ_NEW(orte_ns_replica_dti_t); - if (NULL == dtip) { /* out of memory */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - dtip->name = strdup(name); - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&i, - orte_ns_replica.dts, dtip))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - dtip->id = orte_ns_replica.num_dts; - (orte_ns_replica.num_dts)++; - - *type = dtip->id; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - -/* - * NAME functions - */ -int orte_ns_replica_create_my_name(void) -{ - orte_jobid_t jobid; - orte_vpid_t vpid; - opal_list_t attrs; - int rc; - - OBJ_CONSTRUCT(&attrs, opal_list_t); - if (ORTE_SUCCESS != (rc = orte_ns.create_jobid(&jobid, &attrs))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attrs); - return rc; - } - OBJ_DESTRUCT(&attrs); - - if (ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, 1, &vpid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name), jobid, vpid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/ns/replica/ns_replica_job_fns.c b/orte/mca/ns/replica/ns_replica_job_fns.c deleted file mode 100644 index c518776b21..0000000000 --- a/orte/mca/ns/replica/ns_replica_job_fns.c +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ -#include "orte_config.h" - -#include -#include - -#include "opal/threads/mutex.h" -#include "opal/util/output.h" -#include "opal/util/show_help.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/gpr/gpr.h" - -#include "ns_replica.h" - -/* - * JOBID functions - */ -int orte_ns_replica_create_jobid(orte_jobid_t *jobid, opal_list_t *attrs) -{ - orte_ns_replica_jobitem_t *child, *parent, *root; - orte_jobid_t parent_job=ORTE_JOBID_INVALID, *jptr; - orte_attribute_t *attr; - int rc; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - *jobid = ORTE_JOBID_INVALID; - - /* is a jobid available, or are we at the max? */ - if (ORTE_JOBID_MAX == orte_ns_replica.num_jobids) { - /* at max - alert user to situation */ - opal_show_help("help-ns-replica.txt", "out-of-jobids", true); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* check for attributes */ - if (NULL != (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_PARENT))) { - /* declares the specified jobid to be the parent of the new one */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - parent_job = *jptr; - } else if (NULL != (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_ROOT))) { - /* use the root of the specified job as the parent of the new one */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL == (root = orte_ns_replica_find_root_job(*jptr))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - parent_job = root->jobid; - } - - /* if the parent jobid is INVALID, then this is the root of a new - * job family - create it - */ - if (ORTE_JOBID_INVALID == parent_job) { - root = OBJ_NEW(orte_ns_replica_jobitem_t); - if (NULL == root) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - root->jobid = orte_ns_replica.num_jobids; - opal_list_append(&orte_ns_replica.jobs, &root->super); - *jobid = root->jobid; - (orte_ns_replica.num_jobids)++; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - - /* if the parent jobid is not INVALID, then the request is for a - * new child for this parent. Find the job's record - */ - if (NULL == (parent = orte_ns_replica_find_job(parent_job))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; - } - - /* add this new job to the parent's list of children */ - child = OBJ_NEW(orte_ns_replica_jobitem_t); - if (NULL == child) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - opal_list_append(&parent->children, &child->super); - child->jobid = orte_ns_replica.num_jobids; - *jobid = child->jobid; - (orte_ns_replica.num_jobids)++; - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - - -int orte_ns_replica_get_job_descendants(orte_jobid_t **descendants, orte_std_cntr_t *num_desc, orte_jobid_t job) -{ - orte_std_cntr_t i, num; - orte_ns_replica_jobitem_t *ptr, *newptr; - orte_jobid_t *descs; - opal_list_t desc_list; - opal_list_item_t *item; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - /* default values */ - *descendants = NULL; - *num_desc = 0; - - /* find this job's record on the tree */ - if (NULL == (ptr = orte_ns_replica_find_job(job))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; - } - - /* construct a flattened list of its descendants - including ourself */ - OBJ_CONSTRUCT(&desc_list, opal_list_t); - newptr = OBJ_NEW(orte_ns_replica_jobitem_t); - newptr->jobid = job; - opal_list_append(&desc_list, &newptr->super); - - orte_ns_replica_construct_flattened_tree(&desc_list, ptr); - - /* count number of entries */ - num = opal_list_get_size(&desc_list); - - /* allocate memory for the array */ - descs = (orte_jobid_t*)malloc(num * sizeof(orte_jobid_t)); - if (NULL == descs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* now fill in the array */ - i = 0; - while (NULL != (item = opal_list_remove_first(&desc_list))) { - ptr = (orte_ns_replica_jobitem_t*)item; - descs[i++] = ptr->jobid; - OBJ_RELEASE(ptr); - } - OBJ_DESTRUCT(&desc_list); - - *descendants = descs; - *num_desc = num; - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - -int orte_ns_replica_get_job_children(orte_jobid_t **children, orte_std_cntr_t *num_childs, orte_jobid_t job) -{ - orte_std_cntr_t i, num; - orte_ns_replica_jobitem_t *ptr, *newptr; - orte_jobid_t *descs; - opal_list_item_t *item; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - /* default values */ - *children = NULL; - *num_childs = 0; - - /* find this job's record on the tree */ - if (NULL == (ptr = orte_ns_replica_find_job(job))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; - } - - /* count number of entries in our direct children - include ourselves */ - num = 1 + opal_list_get_size(&ptr->children); - - /* allocate memory for the array */ - descs = (orte_jobid_t*)malloc(num * sizeof(orte_jobid_t)); - if (NULL == descs) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* now fill in the array - put ourselves first */ - descs[0] = job; - i = 1; - for (item = opal_list_get_first(&ptr->children); - item != opal_list_get_end(&ptr->children); - item = opal_list_get_next(item)) { - newptr = (orte_ns_replica_jobitem_t*)item; - descs[i++] = newptr->jobid; - } - - *children = descs; - *num_childs = num; - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - -int orte_ns_replica_get_root_job(orte_jobid_t *root_job, orte_jobid_t job) -{ - orte_ns_replica_jobitem_t *root; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - if (NULL == (root = orte_ns_replica_find_root_job(job))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; - } - - *root_job = root->jobid; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - - -int orte_ns_replica_get_parent_job(orte_jobid_t *parent_job, orte_jobid_t job) -{ - opal_list_item_t *item; - orte_ns_replica_jobitem_t *root, *ptr, *parent; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - /* find this job's parent object */ - for (item = opal_list_get_first(&orte_ns_replica.jobs); - item != opal_list_get_end(&orte_ns_replica.jobs); - item = opal_list_get_next(item)) { - root = (orte_ns_replica_jobitem_t*)item; - parent = root; - if (NULL != (ptr = down_search(root, &parent, job))) { - goto REPORT; - } - } - /* don't report an error if not found, just return invalid */ - *parent_job = ORTE_JOBID_INVALID; - return ORTE_ERR_NOT_FOUND; - -REPORT: - /* return the info */ - *parent_job = parent->jobid; - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - - -int orte_ns_replica_get_job_family(orte_jobid_t **family, orte_std_cntr_t *num_members, orte_jobid_t job) -{ - orte_jobid_t root; - int rc; - - if (ORTE_SUCCESS != (rc = orte_ns_replica_get_root_job(&root, job))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_get_job_descendants(family, num_members, job))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - -int orte_ns_replica_reserve_range(orte_jobid_t job, orte_vpid_t range, - orte_vpid_t *start) -{ - orte_ns_replica_jobitem_t *ptr; - orte_gpr_value_t *value; - char *segment; - int rc; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - /* find the job's record */ - if (NULL == (ptr = orte_ns_replica_find_job(job))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; - } - - if ((ORTE_VPID_MAX-range-(ptr->next_vpid)) > 0) { - *start = ptr->next_vpid; - ptr->next_vpid += range; - /* now store it in the registry */ - if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_OVERWRITE, segment, 1, 1))) { - ORTE_ERROR_LOG(rc); - free(segment); - return rc; - } - free(segment); - value->tokens[0] = strdup(ORTE_JOB_GLOBALS); - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_JOB_VPID_RANGE_KEY, ORTE_VPID, &ptr->next_vpid))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) { - ORTE_ERROR_LOG(rc); - } - OBJ_RELEASE(value); - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - - /* get here if the range isn't available - alert user */ - opal_show_help("help-ns-replica.txt", "out-of-vpids", true); - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; -} - -int orte_ns_replica_get_vpid_range(orte_jobid_t job, orte_vpid_t *range) -{ - orte_ns_replica_jobitem_t *ptr; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - /* find the job's record */ - if (NULL == (ptr = orte_ns_replica_find_job(job))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; - } - - *range = ptr->next_vpid; - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - diff --git a/orte/mca/ns/replica/ns_replica_recv.c b/orte/mca/ns/replica/ns_replica_recv.c deleted file mode 100644 index 6ce799141f..0000000000 --- a/orte/mca/ns/replica/ns_replica_recv.c +++ /dev/null @@ -1,438 +0,0 @@ -/* -*- C -*- -* -* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -* University Research and Technology -* Corporation. All rights reserved. -* Copyright (c) 2004-2005 The University of Tennessee and The University -* of Tennessee Research Foundation. All rights -* reserved. -* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -* University of Stuttgart. All rights reserved. -* Copyright (c) 2004-2005 The Regents of the University of California. -* All rights reserved. -* $COPYRIGHT$ -* -* Additional copyrights may follow -* -* $HEADER$ -*/ -/** @file: -* -* The Open MPI Name Server -* -*/ - -/* - * includes - */ -#include "orte_config.h" - -#include "orte/orte_constants.h" -#include "orte/orte_types.h" - -#include "opal/threads/mutex.h" -#include "opal/class/opal_list.h" -#include "opal/util/output.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/ns/base/ns_private.h" -#include "ns_replica.h" - - -/* - * handle message from proxies - * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. - * DO NOT RELEASE THIS BUFFER IN THIS CODE - */ - -void orte_ns_replica_recv(int status, orte_process_name_t* sender, - orte_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_buffer_t answer, error_answer; - orte_ns_cmd_flag_t command; - opal_list_t attrs; - orte_jobid_t job, root, *descendants; - orte_vpid_t startvpid, range; - char *tagname; - orte_rml_tag_t oob_tag; - orte_data_type_t type; - orte_std_cntr_t count, nprocs, nret; - orte_process_name_t *procs; - int rc=ORTE_SUCCESS; - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - rc = ORTE_ERR_BAD_PARAM; - goto RETURN_ERROR; - } - - OBJ_CONSTRUCT(&answer, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - switch (command) { - case ORTE_NS_CREATE_NODEID_CMD: - case ORTE_NS_GET_NODE_INFO_CMD: - ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); - goto RETURN_ERROR; - break; - - case ORTE_NS_CREATE_JOBID_CMD: - /* get the list of attributes */ - OBJ_CONSTRUCT(&attrs, opal_list_t); - count = 1; - if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &attrs, &count, ORTE_ATTR_LIST))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_create_jobid(&job, &attrs))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attrs); - goto RETURN_ERROR; - } - OBJ_DESTRUCT(&attrs); - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&job, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_GET_JOB_DESC_CMD: - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, (void*)&job, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_get_job_descendants(&descendants, &nret, job))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&nret, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (0 < nret) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)descendants, nret, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - } - - if (0 > (rc = orte_rml.send_buffer(sender, &answer, tag, 0))) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_GET_JOB_CHILD_CMD: - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, (void*)&job, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_get_job_children(&descendants, &nret, job))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&nret, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (0 < nret) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)descendants, nret, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - } - - if (0 > (rc = orte_rml.send_buffer(sender, &answer, tag, 0))) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_GET_ROOT_JOB_CMD: - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, (void*)&job, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_get_root_job(&root, job))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&root, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (0 > (rc = orte_rml.send_buffer(sender, &answer, tag, 0))) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_GET_PARENT_JOB_CMD: - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, (void*)&job, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_get_parent_job(&root, job))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&root, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (0 > (rc = orte_rml.send_buffer(sender, &answer, tag, 0))) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_GET_JOB_FAMILY_CMD: - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, (void*)&job, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_get_job_family(&descendants, &nret, job))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&nret, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (0 < nret) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)descendants, nret, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - } - - if (0 > (rc = orte_rml.send_buffer(sender, &answer, tag, 0))) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_RESERVE_RANGE_CMD: - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, (void*)&job, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, (void*)&range, &count, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_reserve_range(job, range, &startvpid))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&startvpid, 1, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (0 > (rc = orte_rml.send_buffer(sender, &answer, tag, 0))) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_GET_VPID_RANGE_CMD: - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, (void*)&job, &count, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_get_vpid_range(job, &range))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&range, 1, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (0 > (rc = orte_rml.send_buffer(sender, &answer, tag, 0))) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_ASSIGN_OOB_TAG_CMD: - count = 1; - if (0 > orte_dss.unpack(buffer, &tagname, &count, ORTE_STRING)) { - rc = ORTE_ERR_UNPACK_FAILURE; - goto RETURN_ERROR; - } - - if (0 == strncmp(tagname, "NULL", 4)) { - if (ORTE_SUCCESS != (rc = orte_ns_replica_assign_rml_tag(&oob_tag, NULL))) { - goto RETURN_ERROR; - } - } else { - if (ORTE_SUCCESS != (rc = orte_ns_replica_assign_rml_tag(&oob_tag, tagname))) { - goto RETURN_ERROR; - } - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&oob_tag, 1, ORTE_RML_TAG))) { - goto RETURN_ERROR; - } - - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_DEFINE_DATA_TYPE_CMD: - count = 1; - if (0 > orte_dss.unpack(buffer, &tagname, &count, ORTE_STRING)) { - rc = ORTE_ERR_UNPACK_FAILURE; - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_ns_replica_define_data_type(tagname, &type))) { - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, (void*)&type, 1, ORTE_DATA_TYPE))) { - goto RETURN_ERROR; - } - - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_GET_PEERS_CMD: - /* get the list of attributes */ - OBJ_CONSTRUCT(&attrs, opal_list_t); - count = 1; - if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &attrs, &count, ORTE_ATTR_LIST))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - /* process the request */ - if (ORTE_SUCCESS != (rc = orte_ns_replica_get_peers(&procs, &nprocs, &attrs))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&attrs); - goto RETURN_ERROR; - } - OBJ_DESTRUCT(&attrs); - - /* pack the answer */ - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &nprocs, 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - - if (nprocs > 0) { - if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, procs, nprocs, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - } - - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_DUMP_JOBIDS_CMD: - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_jobs_fn(&answer))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_DUMP_TAGS_CMD: - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_tags_fn(&answer))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_DUMP_DATATYPES_CMD: - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_datatypes_fn(&answer))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - default: - goto RETURN_ERROR; - } - goto CLEANUP; - -RETURN_ERROR: - OBJ_CONSTRUCT(&error_answer, orte_buffer_t); - orte_dss.pack(&error_answer, (void*)&command, 1, ORTE_NS_CMD); - orte_dss.pack(&error_answer, (void*)&rc, 1, ORTE_INT32); - orte_rml.send_buffer(sender, &error_answer, tag, 0); - OBJ_DESTRUCT(&error_answer); - -CLEANUP: - /* cleanup */ - OBJ_DESTRUCT(&answer); -} - diff --git a/orte/mca/ns/replica/ns_replica_support_fns.c b/orte/mca/ns/replica/ns_replica_support_fns.c deleted file mode 100644 index fa2c2da7fc..0000000000 --- a/orte/mca/ns/replica/ns_replica_support_fns.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ -#include "orte_config.h" - -#include -#include - -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "ns_replica.h" - -orte_ns_replica_jobitem_t *down_search(orte_ns_replica_jobitem_t *ptr, - orte_ns_replica_jobitem_t **parent_ptr, - orte_jobid_t job) -{ - opal_list_item_t *item; - orte_ns_replica_jobitem_t *ptr2, *ptr3; - - /* check if this is the specified job */ - if (ptr->jobid == job) { - return ptr; - } - - /* otherwise, look at the children of this ptr. call ourselves - * to check each one - */ - for (item = opal_list_get_first(&ptr->children); - item != opal_list_get_end(&ptr->children); - item = opal_list_get_next(item)) { - ptr2 = (orte_ns_replica_jobitem_t*)item; - *parent_ptr = ptr; - if (NULL != (ptr3 = down_search(ptr2, parent_ptr, job))) { - return ptr3; - } - } - - return NULL; -} - -/* find a job's record, wherever it is on the tree */ -orte_ns_replica_jobitem_t* orte_ns_replica_find_job(orte_jobid_t job) -{ - opal_list_item_t *item; - orte_ns_replica_jobitem_t *root, *ptr, *parent; - - for (item = opal_list_get_first(&orte_ns_replica.jobs); - item != opal_list_get_end(&orte_ns_replica.jobs); - item = opal_list_get_next(item)) { - root = (orte_ns_replica_jobitem_t*)item; - if (NULL != (ptr = down_search(root, &parent, job))) { - return ptr; - } - } - - /* don't report an error if not found, just return NULL */ - return NULL; -} - -/* given a jobid, find it's root job's object */ -orte_ns_replica_jobitem_t* orte_ns_replica_find_root_job(orte_jobid_t job) -{ - opal_list_item_t *item; - orte_ns_replica_jobitem_t *root, *ptr, *parent; - - for (item = opal_list_get_first(&orte_ns_replica.jobs); - item != opal_list_get_end(&orte_ns_replica.jobs); - item = opal_list_get_next(item)) { - root = (orte_ns_replica_jobitem_t*)item; - - if (NULL != (ptr = down_search(root, &parent, job))) { - return root; - } - } - - /* don't report an error if not found, just return NULL */ - return NULL; -} - -/* given a job's record, construct a flattened list of the descendants below it, - * including the starting point - */ -void orte_ns_replica_construct_flattened_tree(opal_list_t *tree, orte_ns_replica_jobitem_t *ptr) -{ - orte_ns_replica_jobitem_t *job, *newjob; - opal_list_item_t *item; - - for (item = opal_list_get_first(&ptr->children); - item != opal_list_get_end(&ptr->children); - item = opal_list_get_next(item)) { - job = (orte_ns_replica_jobitem_t*)item; - - newjob = OBJ_NEW(orte_ns_replica_jobitem_t); - newjob->jobid = job->jobid; - newjob->next_vpid = job->next_vpid; - opal_list_append(tree, &newjob->super); - - orte_ns_replica_construct_flattened_tree(tree, job); /* get anyone below this one */ - } -} diff --git a/orte/mca/odls/base/Makefile.am b/orte/mca/odls/base/Makefile.am index 39a13175e7..2edd87f1a7 100644 --- a/orte/mca/odls/base/Makefile.am +++ b/orte/mca/odls/base/Makefile.am @@ -27,12 +27,4 @@ libmca_odls_la_SOURCES += \ base/odls_base_default_fns.c \ base/odls_base_open.c \ base/odls_base_select.c \ - base/odls_base_state.c \ - base/odls_base_purge_params.c \ - base/data_type_support/odls_compare_fns.c \ - base/data_type_support/odls_copy_fns.c \ - base/data_type_support/odls_packing_fns.c \ - base/data_type_support/odls_print_fns.c \ - base/data_type_support/odls_release_fns.c \ - base/data_type_support/odls_size_fns.c \ - base/data_type_support/odls_unpacking_fns.c + base/odls_base_state.c diff --git a/orte/mca/odls/base/data_type_support/odls_compare_fns.c b/orte/mca/odls/base/data_type_support/odls_compare_fns.c deleted file mode 100755 index 1edce27360..0000000000 --- a/orte/mca/odls/base/data_type_support/odls_compare_fns.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "orte/mca/odls/base/odls_private.h" - -/* ORTE_DAEMON_CMD */ -int orte_odls_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_flag_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - diff --git a/orte/mca/odls/base/data_type_support/odls_copy_fns.c b/orte/mca/odls/base/data_type_support/odls_copy_fns.c deleted file mode 100755 index 030bebfba9..0000000000 --- a/orte/mca/odls/base/data_type_support/odls_copy_fns.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/odls/base/odls_private.h" - -int orte_odls_copy_daemon_cmd(orte_daemon_cmd_flag_t **dest, orte_daemon_cmd_flag_t *src, orte_data_type_t type) -{ - size_t datasize; - - datasize = sizeof(orte_daemon_cmd_flag_t); - - *dest = (orte_daemon_cmd_flag_t*)malloc(datasize); - if (NULL == *dest) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - memcpy(*dest, src, datasize); - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/odls/base/data_type_support/odls_packing_fns.c b/orte/mca/odls/base/data_type_support/odls_packing_fns.c deleted file mode 100644 index b9bd4fa94e..0000000000 --- a/orte/mca/odls/base/data_type_support/odls_packing_fns.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss_internal.h" - -#include "orte/mca/odls/base/odls_private.h" - -/* - * ORTE_DAEMON_CMD - */ -int orte_odls_pack_daemon_cmd(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_DAEMON_CMD_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - diff --git a/orte/mca/odls/base/data_type_support/odls_print_fns.c b/orte/mca/odls/base/data_type_support/odls_print_fns.c deleted file mode 100755 index ccc4d779d1..0000000000 --- a/orte/mca/odls/base/data_type_support/odls_print_fns.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/odls/base/odls_private.h" - -/* - * ORTE_DAEMON_CMD - */ -int orte_odls_print_daemon_cmd(char **output, char *prefix, orte_daemon_cmd_flag_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_DAEMON_CMD\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_DAEMON_CMD\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/odls/base/data_type_support/odls_release_fns.c b/orte/mca/odls/base/data_type_support/odls_release_fns.c deleted file mode 100644 index e7e8f7775b..0000000000 --- a/orte/mca/odls/base/data_type_support/odls_release_fns.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "orte/dss/dss_types.h" - -#include "orte/mca/odls/base/odls_private.h" - -/* - * STANDARD RELEASE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ -void orte_odls_std_release(orte_data_value_t *value) -{ - free(value->data); - value->data = NULL; -} diff --git a/orte/mca/odls/base/data_type_support/odls_size_fns.c b/orte/mca/odls/base/data_type_support/odls_size_fns.c deleted file mode 100755 index bbcc7e4763..0000000000 --- a/orte/mca/odls/base/data_type_support/odls_size_fns.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "orte/mca/odls/base/odls_private.h" - -/* - * STANDARD SIZE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ -int orte_odls_size_daemon_cmd(size_t *size, orte_daemon_cmd_flag_t *src, orte_data_type_t type) -{ - *size = sizeof(orte_daemon_cmd_flag_t); - - return ORTE_SUCCESS; -} diff --git a/orte/mca/odls/base/data_type_support/odls_unpacking_fns.c b/orte/mca/odls/base/data_type_support/odls_unpacking_fns.c deleted file mode 100644 index c45fedadb7..0000000000 --- a/orte/mca/odls/base/data_type_support/odls_unpacking_fns.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/dss/dss_types.h" -#include "orte/dss/dss_internal.h" - -#include "orte/mca/odls/base/odls_private.h" - -/* - * ORTE_DAEMON_CMD - */ -int orte_odls_unpack_daemon_cmd(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int ret; - - /* turn around and unpack the real type */ - ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_DAEMON_CMD_T); - - return ret; -} - diff --git a/orte/mca/odls/base/odls_base_close.c b/orte/mca/odls/base/odls_base_close.c index 4b54c32165..becf7ec848 100644 --- a/orte/mca/odls/base/odls_base_close.c +++ b/orte/mca/odls/base/odls_base_close.c @@ -17,10 +17,10 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include -#include "orte/orte_constants.h" #include "opal/util/trace.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 454ff8021a..e38ce7d59e 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -19,6 +19,8 @@ #include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" #ifdef HAVE_SYS_WAIT_H #include @@ -29,7 +31,6 @@ #endif /* HAVE_SYS_STAT_H */ #include -#include "orte/orte_constants.h" #include "opal/util/opal_environ.h" #include "opal/util/argv.h" #include "opal/util/os_path.h" @@ -37,22 +38,20 @@ #include "opal/util/sys_limits.h" #include "opal/util/show_help.h" -#include "orte/dss/dss.h" +#include "opal/dss/dss.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/schema/schema.h" -#include "orte/mca/smr/smr.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/routed/routed.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/rmgr/rmgr.h" #include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/iof_base_setup.h" -#include "orte/mca/ras/base/ras_private.h" -#include "orte/mca/sds/base/base.h" +#include "orte/mca/ess/base/base.h" + +#include "orte/util/context_fns.h" +#include "orte/util/name_fns.h" #include "orte/util/session_dir.h" #include "orte/util/sys_info.h" -#include "orte/util/univ_info.h" -#include "orte/runtime/params.h" +#include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #if OPAL_ENABLE_FT == 1 @@ -267,243 +266,154 @@ static int slot_list_to_cpu_set(char *slot_str, orte_odls_child_t *child) } -int orte_odls_base_default_get_add_procs_data(orte_gpr_notify_data_t **data, - orte_job_map_t *map) +/* IT IS CRITICAL THAT ANY CHANGE IN THE ORDER OF THE INFO PACKED IN + * THIS FUNCTION BE REFLECTED IN THE CONSTRUCT_CHILD_LIST PARSER BELOW +*/ +int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, + orte_jobid_t job) { - orte_gpr_notify_data_t *ndat; - orte_gpr_value_t *value; - opal_list_item_t *item, *m_item; - orte_mapped_node_t *node; - orte_mapped_proc_t *proc; + orte_node_t **nodes, *node; + orte_proc_t **procs, *proc; int rc; - int posn; - orte_std_cntr_t num_kvs; - orte_app_context_t **app_contexts; - orte_std_cntr_t i, num_contexts, total_slots_alloc; - orte_vpid_t range; - bool override; - - /* set default answer */ - *data = NULL; - - ndat = OBJ_NEW(orte_gpr_notify_data_t); - if (NULL == ndat) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* construct a fake trigger name so that the we can extract the jobid from it later */ - if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(ndat->target), "bogus", map->job))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* get the vpid range */ - if (ORTE_SUCCESS != (rc = orte_ns.get_vpid_range(map->job, &range))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* get the total slots allocated to us */ - if (ORTE_SUCCESS != (rc = orte_rmgr.get_universe_size(map->job, &total_slots_alloc))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* get the override_oversubscribed flag */ - if (ORTE_SUCCESS != (rc = orte_ras_base_get_oversubscribe_override(map->job, &override))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* get the app_context array */ - if (ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(map->job, &app_contexts, &num_contexts))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* compute number of keyvals required */ - num_kvs = 3 + num_contexts; - - /* create the value object - don't need tokens or segment name */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, 0, NULL, num_kvs, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* store the range - ORTE_VPID_KEY */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), - NULL, - ORTE_VPID, &range))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - /* store the total slots allocated - ORTE_JOB_TOTAL_SLOTS_ALLOC_KEY */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), - NULL, - ORTE_STD_CNTR, &total_slots_alloc))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - /* store the oversubscribe override flag - ORTE_JOB_OVERSUBSCRIBE_OVERRIDE_KEY */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), - NULL, - ORTE_BOOL, &override))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } + orte_job_t *jdata; + orte_job_map_t *map; + orte_std_cntr_t i; + orte_vpid_t j; + orte_vpid_t invalid_vpid=ORTE_VPID_INVALID; - /* for each context, store it - ORTE_JOB_APP_CONTEXT_KEY */ - for (i=0; i < num_contexts; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[i+3]), - NULL, - ORTE_APP_CONTEXT, app_contexts[i]))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } + /* get the job data pointer */ + if (NULL == (jdata = orte_get_job_data_object(job))) { + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return ORTE_ERR_BAD_PARAM; } - /* add the data to the notify_data object */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&i, ndat->values, value))) { + /* pack the jobid so it can be extracted later */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &job, 1, ORTE_JOBID))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); return rc; } - ndat->cnt = 1; - /* the remainder of our required info is in the mapped_node objects, so all we - * have to do is transfer it over + /* pack the number of procs in the job - equates to the vpid range for the job */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->num_procs, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack the total slots allocated to us */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->total_slots_alloc, 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack the override_oversubscribed flag */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->oversubscribe_override, 1, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack the number of app_contexts for this job */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->num_apps, 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack the app_contexts for this job - we already checked early on that + * there must be at least one, so don't bother checking here again */ - for (m_item = opal_list_get_first(&map->nodes); - m_item != opal_list_get_end(&map->nodes); - m_item = opal_list_get_next(m_item)) { - node = (orte_mapped_node_t*)m_item; - - /* determine the number of keyvals we need for this node */ - num_kvs = 3 + 4*node->num_procs; - - /* create the value object - don't need tokens or segment name */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, 0, NULL, num_kvs, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* IT IS CRITICAL THAT ANY CHANGE IN THE ORDER OF THE INFO IN THESE KEYVALS - * BE REFLECTED IN THE CONSTRUCT_CHILD_LIST PARSER BELOW - */ - - /* store the node-specific data */ - /* ORTE_VPID_KEY*/ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), - NULL, - ORTE_VPID, &(node->daemon->vpid)))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - /* ORTE_NODE_NUM_PROCS_KEY */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), - NULL, - ORTE_STD_CNTR, &node->num_procs))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - /* ORTE_NODE_OVERSUBSCRIBED_KEY */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), - NULL, - ORTE_BOOL, &node->oversubscribed))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - /* cycle through and add the proc-specific data */ - posn = 3; - for (item = opal_list_get_first(&node->procs); - item != opal_list_get_end(&node->procs); - item = opal_list_get_next(item)) { - proc = (orte_mapped_proc_t*)item; - - /* ORTE_VPID_KEY */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[posn++]), - NULL, - ORTE_VPID, &(proc->name.vpid)))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - /* ORTE_PROC_APP_CONTEXT_KEY */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[posn++]), - NULL, - ORTE_STD_CNTR, &proc->app_idx))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - /* ORTE_PROC_LOCAL_RANK_KEY */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[posn++]), - NULL, - ORTE_VPID, &proc->local_rank))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - /* ORTE_PROC_CPU_LIST_KEY */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[posn++]), - NULL, - ORTE_STRING, proc->slot_list))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - } - - /* add this node's data to the notify_data object */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&i, ndat->values, value))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - ndat->cnt += 1; + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, jdata->apps->addr, jdata->num_apps, ORTE_APP_CONTEXT))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* the remainder of our required info is in the node objects in this job's map, + * so pickup a pointer to that map + */ + map = jdata->map; + + /* pack the number of nodes participating in this launch */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &map->num_nodes, 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* cycle through the participating nodes */ + nodes = (orte_node_t**)map->nodes->addr; + for (i=0; i < map->num_nodes; i++) { + node = nodes[i]; + /* PACK NODE-SPECIFIC DATA */ + /* pack the vpid of the daemon on this node - this will be + * later used to tell the daemon it has something to do + */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &(node->daemon->name.vpid), 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack the number of procs on this node */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &node->num_procs, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack the oversubscribed flag for the node */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &node->oversubscribed, 1, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* PACK THE PROC-SPECIFIC DATA FOR THE PROCS **TO BE LAUNCHED** ON THIS NODE + * FOR THIS JOB + * + * NOTE: The nodes object contains info on ALL procs on the node, not just those + * to be launched for the specified job. Thus, we must take care to CLEARLY + * demarcate info on those procs to be launched, or else we will get + * duplicate processes!! + */ + /* we already packed the number of procs on the node, so cycle + * through them and pack each one's launch data + */ + procs = (orte_proc_t**)node->procs->addr; + for (j=0; j < node->num_procs; j++) { + proc = procs[j]; /* convenience */ + /* the mapped node includes ALL procs on it, not just those for the + * job to be launched. Hence, check first to see if this proc is + * part of the indicated job - if not, don't include it here + */ + if (proc->name.jobid != job) { + continue; + } + /* pack the vpid for this proc */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &(proc->name.vpid), 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* pack the app_context index */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &proc->app_idx, 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* pack the local rank */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &proc->local_rank, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* pack the cpu_list string */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &proc->slot_list, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + /* pack an INVALID vpid as a flag that we are done with procs for this node */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &invalid_vpid, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } } - *data = ndat; return ORTE_SUCCESS; } -int orte_odls_base_default_construct_child_list(orte_gpr_notify_data_t *data, +int orte_odls_base_default_construct_child_list(opal_buffer_t *data, orte_jobid_t *job, orte_std_cntr_t *num_local_procs, orte_vpid_t *vpid_range, @@ -511,20 +421,22 @@ int orte_odls_base_default_construct_child_list(orte_gpr_notify_data_t *data, bool *node_included, bool *oversubscribed, bool *override_oversubscribed, - opal_list_t *app_context_list) + orte_std_cntr_t *num_contexts, + orte_app_context_t ***app_contexts) { int rc; - orte_app_context_t *app; - orte_gpr_value_t *value, **values; - orte_vpid_t *vptr; + orte_vpid_t local_rank, num_procs; orte_odls_child_t *child; - orte_odls_app_context_t *app_item; - orte_std_cntr_t j, kv, *sptr, posn; - orte_process_name_t proc; - bool *bptr; + orte_std_cntr_t cnt, j, num_nodes, app_idx; + orte_process_name_t proc, daemon; char *slot_str; + bool node_oversubscribed; - /* parse the returned data to create the required structures + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:constructing child list", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* unpack the returned data to create the required structures * for a fork launch. Since the data will contain information * on procs for ALL nodes, we first have to find the value * struct that contains info for our node. @@ -539,151 +451,176 @@ int orte_odls_base_default_construct_child_list(orte_gpr_notify_data_t *data, *oversubscribed = false; *override_oversubscribed = false; - /* first, retrieve the job number we are to launch from the - * returned data - we can extract the jobid directly from the - * subscription name we created - */ - if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(job, data->target))) { + /* unpack the jobid we are to launch */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, job, &cnt, ORTE_JOBID))) { ORTE_ERROR_LOG(rc); return rc; } - opal_output(orte_odls_globals.output, "odls: setting up launch for job %ld", (long)*job); + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:construct_child_list unpacking data to launch job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job))); - /* setup the routing table for communications - we need to do this - * prior to launch as the procs may want to communicate right away + /* UNPACK JOB-SPECIFIC DATA */ + /* unpack the number of procs in the job - equates to the vpid range */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, vpid_range, &cnt, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* unpack the total slots allocated to us */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, total_slots_alloc, &cnt, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* unpack the override_oversubscribed flag */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, override_oversubscribed, &cnt, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* unpack the number of app_contexts for this job */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, num_contexts, &cnt, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:construct_child_list unpacking %ld app_contexts", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)*num_contexts)); + + /* allocate space and unpack the app_contexts for this job - the HNP checked + * that there must be at least one, so don't bother checking here again */ - if (ORTE_SUCCESS != (rc = orte_routed.init_routes(*job, data))) { + *app_contexts = (orte_app_context_t**)malloc(*num_contexts * sizeof(orte_app_context_t*)); + if (NULL == *app_contexts) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, *app_contexts, num_contexts, ORTE_APP_CONTEXT))) { ORTE_ERROR_LOG(rc); return rc; } - /* init the proc object */ + /* UNPACK THE NODE-SPECIFIC DATA */ + /* unpack the number of nodes participating in this launch */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &num_nodes, &cnt, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* setup the proc and daemon names */ proc.jobid = *job; + daemon.jobid = ORTE_PROC_MY_NAME->jobid; - values = (orte_gpr_value_t**)(data->values)->addr; - /* the first value in the data object contains the job-global data, so extract it first */ - value = values[0]; - /* ORTE_JOB_VPID_RANGE_KEY */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[0]->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - *vpid_range = *vptr; - /* ORTE_JOB_TOTAL_SLOTS_ALLOC_KEY */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, value->keyvals[1]->value, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - *total_slots_alloc = *sptr; - /* ORTE_JOB_OVERSUBSCRIBE_OVERRIDE_KEY */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, value->keyvals[2]->value, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - *override_oversubscribed = *bptr; - /* loop through remaining keyvals and get the app_contexts */ - for (kv=3; kv < value->cnt; kv++) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&app, value->keyvals[kv]->value, ORTE_APP_CONTEXT))) { + /* cycle through them */ + for (j=0; j < num_nodes; j++) { + /* unpack the vpid of the daemon */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &daemon.vpid, &cnt, ORTE_VPID))) { ORTE_ERROR_LOG(rc); return rc; } - app_item = OBJ_NEW(orte_odls_app_context_t); - if (NULL == app_item) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - app_item->app_context = app; - opal_list_append(app_context_list, &app_item->super); - value->keyvals[kv]->value->data = NULL; /* protect the data storage from later release */ - } - /* parsing of job-global data is complete - now process the node-specific data */ - for (j=1; j < data->cnt; j++) { /* loop across remaining values */ - value = values[j]; - /* this must have be about a node, so extract the daemon's vpid so we can - * see if this is intended for us - */ - /* vpid of daemon is in first position */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[0]->value, ORTE_VPID))) { + /* unpack the number of procs on this node */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &num_procs, &cnt, ORTE_VPID))) { ORTE_ERROR_LOG(rc); return rc; } - /* does this data belong to us? */ - if (ORTE_PROC_MY_NAME->vpid != *vptr) { - /* evidently not - ignore it */ - continue; - } - - /* yes it does - indicate that we need to do something */ - *node_included = true; - - /* harvest the rest of the node-specific data */ - /* 2nd position - num local procs for this job on this node */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, value->keyvals[1]->value, ORTE_STD_CNTR))) { + /* unpack the oversubscribed flag for the node */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &node_oversubscribed, &cnt, OPAL_BOOL))) { ORTE_ERROR_LOG(rc); return rc; } - *num_local_procs = *sptr; /* save the value */ - /* 3rd posn - oversubscribed flag */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, value->keyvals[2]->value, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - *oversubscribed = *bptr; /* save the flag */ - - /* loop through the children for this job and harvest their info */ - posn = 3; - while (posn < value->cnt) { - child = OBJ_NEW(orte_odls_child_t); - /* 1st child posn - vpid */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[posn++]->value, ORTE_VPID))) { + /* cycle through the procs and unpack their data */ + /* unpack the vpid for this proc */ + cnt=1; + while (ORTE_SUCCESS == (rc = opal_dss.unpack(data, &(proc.vpid), &cnt, ORTE_VPID))) { + if (ORTE_VPID_INVALID == proc.vpid) { + /* this flags that all data from this node has been read - there + * will be no further entries for it + */ + break; + } + /* unpack the app_context index */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &app_idx, &cnt, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); return rc; } - proc.vpid = *vptr; - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&child->name, &proc, ORTE_NAME))) { + /* unpack the local rank */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &local_rank, &cnt, ORTE_VPID))) { ORTE_ERROR_LOG(rc); return rc; } - - /* 2nd child posn - app_idx */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, value->keyvals[posn++]->value, ORTE_STD_CNTR))) { + /* unpack the cpu_list string */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &slot_str, &cnt, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } - child->app_idx = *sptr; /* save the index into the app_context objects */ - - /* 3rd child posn - local rank */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[posn++]->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - child->local_rank = *vptr; /* save the local_rank */ - - /* 4th child posn - cpu list */ - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&slot_str, value->keyvals[posn++]->value->data, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != slot_str) { - if (ORTE_SUCCESS != (rc = slot_list_to_cpu_set(slot_str, child))){ + /* does this data belong to us? */ + if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) { + /* yes it does - add this proc to our child list */ + child = OBJ_NEW(orte_odls_child_t); + /* copy the name to preserve it */ + if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, &proc, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + return rc; + } + child->app_idx = app_idx; /* save the index into the app_context objects */ + child->local_rank = local_rank; /* save the local_rank */ + if (NULL != slot_str) { + if (ORTE_SUCCESS != (rc = slot_list_to_cpu_set(slot_str, child))){ + ORTE_ERROR_LOG(rc); + free(slot_str); + return rc; + } + free(slot_str); + } + /* protect operation on the global list of children */ + OPAL_THREAD_LOCK(&orte_odls_globals.mutex); + opal_list_append(&orte_odls_globals.children, &child->super); + opal_condition_signal(&orte_odls_globals.cond); + OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); + /* pass along the local info */ + *num_local_procs = num_procs; + *oversubscribed = node_oversubscribed; + /* set the routing info to be direct - we need to do this + * prior to launch as the procs may want to communicate right away + */ + if (ORTE_SUCCESS != (rc = orte_routed.update_route(&proc, &proc))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } else { + /* set the routing info through the other daemon - we need to do this + * prior to launch as the procs may want to communicate right away + */ + if (ORTE_SUCCESS != (rc = orte_routed.update_route(&proc, &daemon))) { ORTE_ERROR_LOG(rc); - free(slot_str); return rc; } - free(slot_str); } - - /* protect operation on the global list of children */ - OPAL_THREAD_LOCK(&orte_odls_globals.mutex); - opal_list_append(&orte_odls_globals.children, &child->super); - opal_condition_signal(&orte_odls_globals.cond); - OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); + } + if (ORTE_SUCCESS != rc && ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + ORTE_ERROR_LOG(rc); + } + + /* do we have any launching to do? */ + if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) { + /* pass that along for later */ + *node_included = true; } } - + return ORTE_SUCCESS; } @@ -696,7 +633,6 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, int rc; int i; char *param, *param2; - char *uri; /* check the system limits - if we are at our max allowed children, then * we won't be allowed to do this anyway, so we may as well abort now. @@ -713,14 +649,14 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, } /* Try to change to the context cwd and check that the app - exists and is executable The resource manager functions will + exists and is executable The function will take care of outputting a pretty error message, if required */ - if (ORTE_SUCCESS != (rc = orte_rmgr.check_context_cwd(context, true))) { + if (ORTE_SUCCESS != (rc = orte_util_check_context_cwd(context, true))) { /* do not ERROR_LOG - it will be reported elsewhere */ return rc; } - if (ORTE_SUCCESS != (rc = orte_rmgr.check_context_app(context))) { + if (ORTE_SUCCESS != (rc = orte_util_check_context_app(context))) { /* do not ERROR_LOG - it will be reported elsewhere */ return rc; } @@ -764,17 +700,21 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, } /* pass my contact info to the local proc so we can talk */ - uri = orte_rml.get_contact_info(); param = mca_base_param_environ_variable("orte","local_daemon","uri"); - opal_setenv(param, uri, true, environ_copy); + opal_setenv(param, orte_process_info.my_daemon_uri, true, environ_copy); free(param); - free(uri); - /* pass a nodeid to the proc - for now, set this to our vpid as - * this is a globally unique number and we have a one-to-one - * mapping of daemons to nodes + /* pass the hnp's contact info to the local proc in case it + * needs it */ - if (ORTE_SUCCESS != (rc = orte_ns.convert_nodeid_to_string(¶m2, (orte_nodeid_t)ORTE_PROC_MY_NAME->vpid))) { + param = mca_base_param_environ_variable("orte","hnp","uri"); + opal_setenv(param, orte_process_info.my_hnp_uri, true, environ_copy); + free(param); + + /* pass our vpid to the process as a "nodeid" so it can + * identify which procs are local to it + */ + if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(¶m2, ORTE_PROC_MY_NAME->vpid))) { ORTE_ERROR_LOG(rc); return rc; } @@ -799,39 +739,6 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, } free(param); - /* setup universe info */ - if (NULL != orte_universe_info.name) { - param = mca_base_param_environ_variable("universe", NULL, NULL); - asprintf(&uri, "%s@%s:%s", orte_universe_info.uid, - orte_universe_info.host, - orte_universe_info.name); - opal_setenv(param, uri, true, environ_copy); - free(param); - free(uri); - } - - /* setup ns contact info */ - if(NULL != orte_process_info.ns_replica_uri) { - uri = strdup(orte_process_info.ns_replica_uri); - } else { - uri = orte_rml.get_contact_info(); - } - param = mca_base_param_environ_variable("ns","replica","uri"); - opal_setenv(param, uri, true, environ_copy); - free(param); - free(uri); - - /* setup gpr contact info */ - if(NULL != orte_process_info.gpr_replica_uri) { - uri = strdup(orte_process_info.gpr_replica_uri); - } else { - uri = orte_rml.get_contact_info(); - } - param = mca_base_param_environ_variable("gpr","replica","uri"); - opal_setenv(param, uri, true, environ_copy); - free(param); - free(uri); - /* set the app_context number into the environment */ param = mca_base_param_environ_variable("orte","app","num"); asprintf(¶m2, "%ld", (long)context->idx); @@ -845,7 +752,7 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, opal_setenv(param, param2, true, environ_copy); free(param); free(param2); - + /* use same nodename as the starting daemon (us) */ param = mca_base_param_environ_variable("orte", "base", "nodename"); opal_setenv(param, orte_system_info.nodename, true, environ_copy); @@ -854,14 +761,81 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, /* push data into environment - don't push any single proc * info, though. We are setting the environment up on a * per-context basis, and will add the individual proc - * info later + * info later. This also sets the mca param to select + * the "env" component in the SDS framework. */ - orte_ns_nds_env_put(vpid_range, num_local_procs, environ_copy); + orte_ess_env_put(vpid_range, num_local_procs, environ_copy); return ORTE_SUCCESS; } -int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_context_list, +static int pack_state_for_proc(opal_buffer_t *alert, bool pack_pid, orte_odls_child_t *child) +{ + int rc; + + /* pack the child's vpid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &(child->name->vpid), 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* pack its pid if we need to report it */ + if (pack_pid) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->pid, 1, OPAL_PID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + /* pack its state */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->state, 1, ORTE_PROC_STATE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* pack its exit code */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->exit_code, 1, ORTE_EXIT_CODE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} + +static int pack_state_update(opal_buffer_t *alert, bool pack_pid, orte_jobid_t job) +{ + int rc; + opal_list_item_t *item; + orte_odls_child_t *child; + orte_vpid_t null=ORTE_VPID_INVALID; + + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + for (item = opal_list_get_first(&orte_odls_globals.children); + item != opal_list_get_end(&orte_odls_globals.children); + item = opal_list_get_next(item)) { + child = (orte_odls_child_t*)item; + /* if this child is part of the job... */ + if (child->name->jobid == job) { + if (ORTE_SUCCESS != (rc = pack_state_for_proc(alert, pack_pid, child))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + } + /* flag that this job is complete so the receiver can know */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &null, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} + + +int orte_odls_base_default_launch_local(orte_jobid_t job, + orte_std_cntr_t num_apps, + orte_app_context_t **apps, orte_std_cntr_t num_local_procs, orte_vpid_t vpid_range, orte_std_cntr_t total_slots_alloc, @@ -869,57 +843,20 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_conte bool override_oversubscribed, orte_odls_base_fork_local_proc_fn_t fork_local) { - char *job_str, *vpid_str, *param, *value, *session_dir, *uri_file, *my_uri; - FILE *fp; - opal_list_item_t *item, *item2; + char *job_str, *vpid_str, *param, *value; + opal_list_item_t *item; orte_app_context_t *app; - orte_odls_app_context_t *app_item; orte_odls_child_t *child; - int proc_rank, num_processors; - bool want_processor, oversubscribed, quit_flag; - int rc; + int i, num_processors; + bool want_processor, oversubscribed; + int rc, ret; bool launch_failed=true; + opal_buffer_t alert; + orte_std_cntr_t proc_rank; /* protect operations involving the global list of children */ OPAL_THREAD_LOCK(&orte_odls_globals.mutex); - /* record my uri in a file within the session directory so the local proc - * can contact me - */ - opal_output(orte_odls_globals.output, "odls: dropping local uri file"); - - /* put the file in the job session dir for the job being launched */ - orte_ns.convert_jobid_to_string(&job_str, job); - if (ORTE_SUCCESS != (rc = orte_session_dir(true, NULL, NULL, NULL, - NULL, NULL, job_str, NULL))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - - /* get the session dir name so we can put the file there */ - session_dir = NULL; /* init the value so session_dir_get_name doesn't attempt to free it */ - if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&session_dir, NULL, NULL, NULL, - NULL, NULL, NULL, job_str, NULL))) { - ORTE_ERROR_LOG(rc); - free(job_str); - goto CLEANUP; - } - free(job_str); - - /* create the file and put my uri into it */ - uri_file = opal_os_path(false, session_dir, "orted-uri.txt", NULL); - fp = fopen(uri_file, "w"); - if (NULL == fp) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - rc = ORTE_ERR_FILE_OPEN_FAILURE; - goto CLEANUP; - } - my_uri = orte_rml.get_contact_info(); - fprintf(fp, "%s\n", my_uri); - fclose(fp); - free(uri_file); - free(my_uri); - #if OPAL_ENABLE_FT == 1 /* * Notify the local SnapC component regarding new job @@ -931,16 +868,14 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_conte #endif /* Now we preload any files that are needed. This is done on a per - * app context basis */ - for (item = opal_list_get_first(app_context_list); - item != opal_list_get_end(app_context_list); - item = opal_list_get_next(item)) { - app_item = (orte_odls_app_context_t*)item; - if(app_item->app_context->preload_binary || - NULL != app_item->app_context->preload_files) { - if( ORTE_SUCCESS != (rc = orte_odls_base_preload_files_app_context(app_item->app_context)) ) { + * app context basis + */ + for (i=0; i < num_apps; i++) { + if(apps[i]->preload_binary || + NULL != apps[i]->preload_files) { + if( ORTE_SUCCESS != (rc = orte_odls_base_preload_files_app_context(apps[i])) ) { ORTE_ERROR_LOG(rc); - goto CLEANUP; + goto unlock; } } } @@ -958,11 +893,16 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_conte * settings */ want_processor = false; /* default to not being a hog */ - opal_output(orte_odls_globals.output, - "odls: could not get number of processors - using conservative settings"); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:launch could not get number of processors - using conservative settings", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + } else { - opal_output(orte_odls_globals.output, - "odls: got %ld processors", (long)num_processors); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:launch got %ld processors", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)num_processors)); /* only do this if we can actually get info on the number of processors */ if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) { @@ -977,7 +917,11 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_conte * to check it against the local number of processors to ensure we don't overload them */ if (override_oversubscribed) { - opal_output(orte_odls_globals.output, "odls: overriding oversubscription"); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:launch overriding oversubscription", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) { /* if the #procs > #processors, declare us oversubscribed regardless * of what the mapper claimed - the user may have told us something @@ -994,20 +938,29 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_conte } } } - opal_output(orte_odls_globals.output, "odls: oversubscribed set to %s want_processor set to %s", - oversubscribed ? "true" : "false", want_processor ? "true" : "false"); + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:launch oversubscribed set to %s want_processor set to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + oversubscribed ? "true" : "false", want_processor ? "true" : "false")); + + /* setup to report the proc state to the HNP */ + OBJ_CONSTRUCT(&alert, opal_buffer_t); + /* setup the environment for each context */ - for (item2 = opal_list_get_first(app_context_list); - item2 != opal_list_get_end(app_context_list); - item2 = opal_list_get_next(item2)) { - app_item = (orte_odls_app_context_t*)item2; - if (ORTE_SUCCESS != (rc = odls_base_default_setup_fork(app_item->app_context, + for (i=0; i < num_apps; i++) { + if (ORTE_SUCCESS != (rc = odls_base_default_setup_fork(apps[i], num_local_procs, vpid_range, total_slots_alloc, oversubscribed, - &app_item->environ_copy))) { + &apps[i]->env))) { + + OPAL_OUTPUT_VERBOSE((10, orte_odls_globals.output, + "%s odls:launch:setup_fork failed with error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_ERROR_NAME(rc))); + /* do not ERROR_LOG this failure - it will be reported * elsewhere. The launch is going to fail - find at least one child * in this job and mark it as failed-to-start @@ -1016,8 +969,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_conte item != opal_list_get_end(&orte_odls_globals.children); item = opal_list_get_next(item)) { child = (orte_odls_child_t*)item; - if (ORTE_EQUAL == orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { - child->state = ORTE_PROC_STATE_FAILED_TO_START; + if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { child->exit_code = rc; goto CLEANUP; } @@ -1027,9 +979,8 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_conte /* okay, now let's launch our local procs using the provided fork_local fn */ - quit_flag = false; for (proc_rank = 0, item = opal_list_get_first(&orte_odls_globals.children); - !quit_flag && item != opal_list_get_end(&orte_odls_globals.children); + item != opal_list_get_end(&orte_odls_globals.children); item = opal_list_get_next(item)) { child = (orte_odls_child_t*)item; @@ -1038,8 +989,12 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_conte * If it has been launched, then do nothing */ if (child->alive) { - opal_output(orte_odls_globals.output, "odls: child %s is already alive", - ORTE_NAME_PRINT(child->name)); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:launch child %s is already alive", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); + continue; } @@ -1047,67 +1002,72 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_conte * job could be given as a WILDCARD value, we must use * the dss.compare function to check for equality. */ - if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { - opal_output(orte_odls_globals.output, "odls: child %s is not in job %ld being launched", - ORTE_NAME_PRINT(child->name), (long)job); + if (OPAL_EQUAL != opal_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:launch child %s is not in job %s being launched", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name), + ORTE_JOBID_PRINT(job))); + continue; } - - opal_output(orte_odls_globals.output, "odls: preparing to launch child %s", - ORTE_NAME_PRINT(child->name)); - + /* find the app context for this child */ - for (item2 = opal_list_get_first(app_context_list); - item2 != opal_list_get_end(app_context_list); - item2 = opal_list_get_next(item2)) { - app_item = (orte_odls_app_context_t*)item2; - if (child->app_idx == app_item->app_context->idx) { - /* found it */ - app = app_item->app_context; - goto DOFORK; - } + if (child->app_idx > num_apps || + NULL == apps[child->app_idx]) { + /* get here if we couldn't find the app_context */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rc = ORTE_ERR_NOT_FOUND; + goto CLEANUP; } - /* get here if we couldn't find the app_context */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; - goto CLEANUP; + app = apps[child->app_idx]; -DOFORK: /* setup the rest of the environment with the proc-specific items - these * will be overwritten for each child */ - if (ORTE_SUCCESS != (rc = orte_ns.get_jobid_string(&job_str, child->name))) { + if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&job_str, child->name->jobid))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } - if (ORTE_SUCCESS != (rc = orte_ns.get_vpid_string(&vpid_str, child->name))) { + if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid_str, child->name->vpid))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } - if(NULL == (param = mca_base_param_environ_variable("ns","nds","jobid"))) { + if(NULL == (param = mca_base_param_environ_variable("orte","ess","jobid"))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); rc = ORTE_ERR_OUT_OF_RESOURCE; goto CLEANUP; } - opal_setenv(param, job_str, true, &app_item->environ_copy); + opal_setenv(param, job_str, true, &app->env); free(param); free(job_str); - if(NULL == (param = mca_base_param_environ_variable("ns","nds","vpid"))) { + if(NULL == (param = mca_base_param_environ_variable("orte","ess","vpid"))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); rc = ORTE_ERR_OUT_OF_RESOURCE; goto CLEANUP; } - opal_setenv(param, vpid_str, true, &app_item->environ_copy); + opal_setenv(param, vpid_str, true, &app->env); free(param); - free(vpid_str); + /* although the vpid IS the process' rank within the job, users + * would appreciate being given a public environmental variable + * that also represents this value - something MPI specific - so + * do that here. + * + * AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT. + * We know - just live with it + */ + opal_setenv("MPI_COMM_WORLD_RANK", vpid_str, true, &app->env); + free(vpid_str); /* done with this now */ + asprintf(&value, "%lu", (unsigned long) child->local_rank); - if(NULL == (param = mca_base_param_environ_variable("ns","nds","local_rank"))) { + if(NULL == (param = mca_base_param_environ_variable("orte","ess","local_rank"))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); rc = ORTE_ERR_OUT_OF_RESOURCE; goto CLEANUP; } - opal_setenv(param, value, true, &app_item->environ_copy); + opal_setenv(param, value, true, &app->env); free(param); free(value); @@ -1115,13 +1075,13 @@ DOFORK: param = mca_base_param_environ_variable("mpi", NULL, "paffinity_processor"); asprintf(&value, "%lu", (unsigned long) proc_rank); - opal_setenv(param, value, true, &app_item->environ_copy); + opal_setenv(param, value, true, &app->env); free(param); free(value); } else { param = mca_base_param_environ_variable("mpi", NULL, "paffinity_processor"); - opal_unsetenv(param, &app_item->environ_copy); + opal_unsetenv(param, &app->env); free(param); } @@ -1147,50 +1107,76 @@ DOFORK: &(app->app), &(app->cwd), &(app->argv), - &(app_item->environ_copy) ) ) ) { + &(app->env) ) ) ) { ORTE_ERROR_LOG(rc); goto CLEANUP; } } #endif #endif + if (5 < opal_output_get_verbosity(orte_odls_globals.output)) { + opal_output(orte_odls_globals.output, "%s odls:launch: spawning child %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name)); + + /* dump what is going to be exec'd */ + if (7 < opal_output_get_verbosity(orte_odls_globals.output)) { + opal_dss.dump(orte_odls_globals.output, app, ORTE_APP_CONTEXT); + } + } - if (ORTE_SUCCESS != (rc = fork_local(app, child, app_item->environ_copy))) { + rc = fork_local(app, child, app->env); + /* reaquire lock so we don't double unlock... */ + OPAL_THREAD_LOCK(&orte_odls_globals.mutex); + if (ORTE_SUCCESS != rc) { /* do NOT ERROR_LOG this error - it generates * a message/node as most errors will be common * across the entire cluster. Instead, we let orterun * output a consolidated error message for us */ - quit_flag = true; + goto CLEANUP; + } else { + child->alive = true; + child->state = ORTE_PROC_STATE_LAUNCHED; } - /* reaquire lock so we don't double unlock... */ - OPAL_THREAD_LOCK(&orte_odls_globals.mutex); /* move to next processor */ proc_rank++; } launch_failed = false; CLEANUP: - /* report the proc info and state in the registry */ - if (ORTE_SUCCESS != (rc = orte_odls_base_report_spawn(&orte_odls_globals.children))) { - ORTE_ERROR_LOG(rc); - opal_condition_signal(&orte_odls_globals.cond); - OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); - return rc; + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:launch reporting job %s launch status", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job))); + /* pack the launch results */ + if (ORTE_SUCCESS != (ret = pack_state_update(&alert, true, job))) { + ORTE_ERROR_LOG(ret); } + /* send the update */ + if (0 > (ret = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &alert, ORTE_RML_TAG_APP_LAUNCH_CALLBACK, 0))) { + ORTE_ERROR_LOG(ret); + } + OBJ_DESTRUCT(&alert); + +unlock: if (!launch_failed) { - /* setup the waitpids on the children */ + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:launch setting waitpids", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* if the launch didn't fail, setup the waitpids on the children */ for (item = opal_list_get_first(&orte_odls_globals.children); item != opal_list_get_end(&orte_odls_globals.children); item = opal_list_get_next(item)) { child = (orte_odls_child_t*)item; if (ORTE_PROC_STATE_LAUNCHED == child->state) { + child->state = ORTE_PROC_STATE_RUNNING; OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); orte_wait_cb(child->pid, odls_base_default_wait_local_proc, NULL); OPAL_THREAD_LOCK(&orte_odls_globals.mutex); - child->state = ORTE_PROC_STATE_RUNNING; } } } @@ -1200,47 +1186,7 @@ CLEANUP: return rc; } -int orte_odls_base_default_extract_proc_map_info(orte_process_name_t *daemon, - opal_list_t *proc_list, - orte_gpr_value_t *value) -{ - int rc; - orte_vpid_t *vptr; - orte_process_name_t name; - orte_std_cntr_t posn; - orte_namelist_t *proc; - - /* daemon jobid is set by caller */ - /* vpid of daemon that will host these procs is in first position */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[0]->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - daemon->vpid = *vptr; - - /* loop through the children for this job and harvest their info */ - posn = 3; - name.jobid = ORTE_JOBID_INVALID; /* must be reset by caller */ - while (posn < value->cnt) { - proc = OBJ_NEW(orte_namelist_t); - /* 1st child posn - vpid */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[posn]->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - name.vpid = *vptr; - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&proc->name, &name, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; - } - opal_list_append(proc_list, &proc->item); - posn += 4; - } - - return ORTE_SUCCESS; -} - -int orte_odls_base_default_deliver_message(orte_jobid_t job, orte_buffer_t *buffer, orte_rml_tag_t tag) +int orte_odls_base_default_deliver_message(orte_jobid_t job, opal_buffer_t *buffer, orte_rml_tag_t tag) { int rc; opal_list_item_t *item; @@ -1259,11 +1205,14 @@ int orte_odls_base_default_deliver_message(orte_jobid_t job, orte_buffer_t *buff * the dss.compare function to check for equality. */ if (!child->alive || - ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { + OPAL_EQUAL != opal_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { continue; } - opal_output(orte_odls_globals.output, "odls: sending message to tag %lu on child %s", - (unsigned long)tag, ORTE_NAME_PRINT(child->name)); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls: sending message to tag %lu on child %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (unsigned long)tag, ORTE_NAME_PRINT(child->name))); /* if so, send the message */ rc = orte_rml.send_buffer(child->name, buffer, tag, 0); @@ -1293,8 +1242,8 @@ int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, i opal_list_item_t *item; orte_odls_child_t *child; - OPAL_OUTPUT_VERBOSE((1, orte_odls_globals.output, - "%s signaling proc %s", + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls: signaling proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == proc) ? "NULL" : ORTE_NAME_PRINT(proc))); @@ -1324,7 +1273,7 @@ int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, i item != opal_list_get_end(&orte_odls_globals.children); item = opal_list_get_next(item)) { child = (orte_odls_child_t*)item; - if (ORTE_EQUAL == orte_dss.compare(&(child->name), (orte_process_name_t*)proc, ORTE_NAME)) { + if (OPAL_EQUAL == opal_dss.compare(&(child->name), (orte_process_name_t*)proc, ORTE_NAME)) { /* unlock before signaling as this may generate a callback */ opal_condition_signal(&orte_odls_globals.cond); OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); @@ -1344,12 +1293,67 @@ int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, i return ORTE_ERR_NOT_FOUND; } -int orte_odls_base_default_require_sync(orte_process_name_t *proc) +static bool all_children_registered(orte_jobid_t job) { - orte_buffer_t buffer; + opal_list_item_t *item; + orte_odls_child_t *child; + + /* the thread is locked elsewhere - don't try to do it again here */ + + for (item = opal_list_get_first(&orte_odls_globals.children); + item != opal_list_get_end(&orte_odls_globals.children); + item = opal_list_get_next(item)) { + child = (orte_odls_child_t*)item; + + /* is this child part of the specified job? */ + if (OPAL_EQUAL == opal_dss.compare(&child->name->jobid, &job, ORTE_JOBID)) { + /* if this child is *not* registered yet, return false */ + if (NULL == child->rml_uri) { + return false; + } + } + } + + /* if we get here, then everyone in the job is registered */ + return true; + +} + +static int pack_child_contact_info(orte_jobid_t job, opal_buffer_t *buf) +{ + opal_list_item_t *item; + orte_odls_child_t *child; + int rc; + + /* the thread is locked elsewhere - don't try to do it again here */ + + for (item = opal_list_get_first(&orte_odls_globals.children); + item != opal_list_get_end(&orte_odls_globals.children); + item = opal_list_get_next(item)) { + child = (orte_odls_child_t*)item; + + /* is this child part of the specified job? */ + if (OPAL_EQUAL == opal_dss.compare(&child->name->jobid, &job, ORTE_JOBID)) { + /* pack the contact info */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &child->rml_uri, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + } + + return ORTE_SUCCESS; + +} + + +int orte_odls_base_default_require_sync(orte_process_name_t *proc, opal_buffer_t *buf) +{ + opal_buffer_t buffer; opal_list_item_t *item; orte_odls_child_t *child; int8_t dummy; + orte_std_cntr_t cnt; int rc; bool found=false; @@ -1362,11 +1366,13 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc) child = (orte_odls_child_t*)item; /* find this child */ - if (ORTE_EQUAL == orte_dss.compare(proc, child->name, ORTE_NAME)) { - opal_output(orte_odls_globals.output, "odls: registering sync on child %s", - ORTE_NAME_PRINT(child->name)); + if (OPAL_EQUAL == opal_dss.compare(proc, child->name, ORTE_NAME)) { + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls: registering sync on child %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); - child->sync_required = !child->sync_required; found = true; break; } @@ -1377,23 +1383,42 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc) */ if (!found) { child = OBJ_NEW(orte_odls_child_t); - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&child->name, proc, ORTE_NAME))) { + if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, proc, ORTE_NAME))) { ORTE_ERROR_LOG(rc); return rc; } opal_list_append(&orte_odls_globals.children, &child->super); /* we don't know any other info about the child, so just indicate it's - * alive and set the sync + * alive */ child->alive = true; - child->sync_required = !child->sync_required; + } + + /* if the contact info is already set, then we are "de-registering" the child + * so free the info and set it to NULL + */ + if (NULL != child->rml_uri) { + free(child->rml_uri); + child->rml_uri = NULL; + } else { + /* if the contact info is not set, then we are registering the child so + * unpack the contact info from the buffer and store it + */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &(child->rml_uri), &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + } } /* ack the call */ - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - orte_dss.pack(&buffer, &dummy, 1, ORTE_INT8); /* put anything in */ - opal_output(orte_odls_globals.output, "odls: sending sync ack to child %s", - ORTE_NAME_PRINT(proc)); + OBJ_CONSTRUCT(&buffer, opal_buffer_t); + opal_dss.pack(&buffer, &dummy, 1, OPAL_INT8); /* put anything in */ + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls: sending sync ack to child %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc))); + if (0 > (rc = orte_rml.send_buffer(proc, &buffer, ORTE_RML_TAG_SYNC, 0))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buffer); @@ -1401,11 +1426,68 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc) } OBJ_DESTRUCT(&buffer); + /* now check to see if everyone in this job has registered */ + if (all_children_registered(proc->jobid)) { + /* once everyone registers, send their contact info to + * the HNP so it is available to debuggers and anyone + * else that needs it + */ + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls: sending contact info to HNP", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + OBJ_CONSTRUCT(&buffer, opal_buffer_t); + /* store jobid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &proc->jobid, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buffer); + goto CLEANUP; + } + /* add in contact info for all procs in the job */ + if (ORTE_SUCCESS != (rc = pack_child_contact_info(proc->jobid, &buffer))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buffer); + goto CLEANUP; + } + /* now send it to the HNP */ + if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buffer, ORTE_RML_TAG_INIT_ROUTES, 0))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buffer); + goto CLEANUP; + } + OBJ_DESTRUCT(&buffer); + } + +CLEANUP: opal_condition_signal(&orte_odls_globals.cond); OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); return ORTE_SUCCESS; } +static bool any_live_children(orte_jobid_t job) +{ + opal_list_item_t *item; + orte_odls_child_t *child; + + /* the thread is locked elsewhere - don't try to do it again here */ + + for (item = opal_list_get_first(&orte_odls_globals.children); + item != opal_list_get_end(&orte_odls_globals.children); + item = opal_list_get_next(item)) { + child = (orte_odls_child_t*)item; + + /* is this child part of the specified job? */ + if (OPAL_EQUAL == opal_dss.compare(&child->name->jobid, &job, ORTE_JOBID) && + child->alive) { + return true; + } + } + + /* if we get here, then nobody is left alive from that job */ + return false; + +} /* * Wait for a callback indicating the child has completed. @@ -1415,12 +1497,17 @@ void odls_base_default_wait_local_proc(pid_t pid, int status, void* cbdata) { orte_odls_child_t *child; opal_list_item_t *item; - bool aborted; + bool aborted=false; char *job, *vpid, *abort_file; struct stat buf; int rc; + opal_buffer_t alert; + orte_plm_cmd_flag_t cmd=ORTE_PLM_UPDATE_PROC_STATE; - opal_output(orte_odls_globals.output, "odls: child process %ld terminated", (long)pid); + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc child process %ld terminated", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (long)pid)); /* since we are going to be working with the global list of * children, we need to protect that list from modification @@ -1434,7 +1521,8 @@ void odls_base_default_wait_local_proc(pid_t pid, int status, void* cbdata) item != opal_list_get_end(&orte_odls_globals.children); item = opal_list_get_next(item)) { child = (orte_odls_child_t*)item; - if (child->alive && pid == child->pid) { /* found it */ + + if (pid == child->pid) { /* found it */ goto GOTCHILD; } } @@ -1442,13 +1530,32 @@ void odls_base_default_wait_local_proc(pid_t pid, int status, void* cbdata) * is already dead. If the latter, then we have a problem as it * means we are detecting it exiting multiple times */ - opal_output(orte_odls_globals.output, "odls: did not find pid %ld in table!", (long) pid); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc did not find pid %ld in table!", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (long)pid)); + + /* it's just a race condition - don't error log it */ opal_condition_signal(&orte_odls_globals.cond); OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); return; GOTCHILD: + /* if the child was previously flagged as dead, then just + * ensure that its exit state gets reported to avoid hanging + */ + if (!child->alive) { + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc child %s was already dead", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); + goto MOVEON; + } + + /* save the exit code */ + child->exit_code = status; + /* If this child was the (vpid==0), we hooked it up to orterun's STDIN SOURCE earlier (do not change this without also changing odsl_default_fork_local_proc()). So we have to tell the SOURCE @@ -1462,9 +1569,13 @@ GOTCHILD: *have* to do this unpublish here, even if it arrives after an exception is detected and handled (in which case this unpublish request will be ignored/discarded. */ - opal_output(orte_odls_globals.output, - "odls: pid %ld corresponds to %s\n", - (long) pid, ORTE_NAME_PRINT(child->name)); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc pid %ld corresponds to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (long)pid, + ORTE_NAME_PRINT(child->name))); + if (0 == child->name->vpid) { rc = orte_iof.iof_unpublish(child->name, ORTE_NS_CMP_ALL, ORTE_IOF_STDIN); @@ -1473,17 +1584,19 @@ GOTCHILD: /* We can't really abort, so keep going... */ } } - opal_output(orte_odls_globals.output, "orted sent IOF unpub message!\n"); - #if 0 + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc orted sent IOF unpub message!", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + +#if 0 /* Note that the svc IOF component will detect an exception on the oob because we're shutting it down, so it will take care of closing down any streams that it has open to us. */ orte_iof.iof_flush(); - #endif +#endif /* determine the state of this process */ - aborted = false; if(WIFEXITED(status)) { /* even though the process exited "normally", it is quite * possible that this happened via an orte_abort call - in @@ -1495,56 +1608,81 @@ GOTCHILD: * of an "abort" file in this process' session directory. If * we find it, then we know that this was an abnormal termination. */ - if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&job, child->name->jobid))) { + if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&job, child->name->jobid))) { ORTE_ERROR_LOG(rc); goto MOVEON; } - if (ORTE_SUCCESS != (rc = orte_ns.convert_vpid_to_string(&vpid, child->name->vpid))) { + if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid, child->name->vpid))) { ORTE_ERROR_LOG(rc); free(job); goto MOVEON; } - abort_file = opal_os_path(false, orte_process_info.universe_session_dir, + abort_file = opal_os_path(false, orte_process_info.tmpdir_base, + orte_process_info.top_session_dir, job, vpid, "abort", NULL ); + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc checking abort file %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), abort_file)); + free(job); free(vpid); if (0 == stat(abort_file, &buf)) { /* the abort file must exist - there is nothing in it we need. It's * meer existence indicates that an abnormal termination occurred */ - opal_output(orte_odls_globals.output, "odls: child %s died by abort", - ORTE_NAME_PRINT(child->name)); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc child %s died by abort", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); + aborted = true; + child->state = ORTE_PROC_STATE_ABORTED; free(abort_file); } else { /* okay, it terminated normally - check to see if a sync was required and * if it was received */ - if (child->sync_required) { + if (NULL != child->rml_uri) { /* if this is set, then we required a sync and didn't get it, so this * is considered an abnormal termination and treated accordingly */ aborted = true; - opal_output(orte_odls_globals.output, "odls: child process %s terminated normally " - "but did not provide a required sync - it " - "will be treated as an abnormal termination", - ORTE_NAME_PRINT(child->name)); + child->state = ORTE_PROC_STATE_ABORTED; + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc child process %s terminated normally " + "but did not provide a required sync - it " + "will be treated as an abnormal termination", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); + goto MOVEON; + } else { + child->state = ORTE_PROC_STATE_TERMINATED; } - opal_output(orte_odls_globals.output, "odls: child process %s terminated normally", - ORTE_NAME_PRINT(child->name)); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc child process %s terminated normally", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); + } } else { /* the process was terminated with a signal! That's definitely * abnormal, so indicate that condition */ - opal_output(orte_odls_globals.output, "odls: child process %s terminated with signal", - ORTE_NAME_PRINT(child->name)); + child->state = ORTE_PROC_STATE_ABORTED_BY_SIG; + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc child process %s terminated with signal", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); + aborted = true; } MOVEON: - /* set this proc to "not alive" */ + /* indicate the child is no longer alive */ child->alive = false; /* Clean up the session directory as if we were the process @@ -1553,22 +1691,73 @@ MOVEON: */ orte_session_dir_finalize(child->name); - /* set the proc state in the child structure */ + /* setup the alert buffer */ + OBJ_CONSTRUCT(&alert, opal_buffer_t); + + /* if the proc aborted, tell the HNP right away */ if (aborted) { - child->state = ORTE_PROC_STATE_ABORTED; + /* pack update state command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&alert, &cmd, 1, ORTE_PLM_CMD))) { + ORTE_ERROR_LOG(rc); + goto unlock; + } + /* pack only the data for this proc - have to start with the jobid + * so the receiver can unpack it correctly + */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&alert, &child->name->jobid, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + goto unlock; + } + /* now pack the child's info */ + if (ORTE_SUCCESS != (rc = pack_state_for_proc(&alert, false, child))) { + ORTE_ERROR_LOG(rc); + goto unlock; + } + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc reporting proc %s aborted to HNP", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); + + /* send it */ + if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &alert, ORTE_RML_TAG_PLM, 0))) { + ORTE_ERROR_LOG(rc); + goto unlock; + } } else { - child->state = ORTE_PROC_STATE_TERMINATED; + /* since it didn't abort, let's see if all of that job's procs are done */ + if (!any_live_children(child->name->jobid)) { + /* all those children are dead - alert the HNP */ + /* pack update state command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&alert, &cmd, 1, ORTE_PLM_CMD))) { + ORTE_ERROR_LOG(rc); + goto unlock; + } + /* pack the data for the job */ + if (ORTE_SUCCESS != (rc = pack_state_update(&alert, false, child->name->jobid))) { + ORTE_ERROR_LOG(rc); + goto unlock; + } + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:wait_local_proc reporting all procs in %s terminated", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(child->name->jobid))); + + /* send it */ + if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &alert, ORTE_RML_TAG_PLM, 0))) { + ORTE_ERROR_LOG(rc); + goto unlock; + } + } } - /* Need to unlock before we call set_proc_state as this is going to generate - * a trigger that will eventually callback to us - */ +unlock: + OBJ_DESTRUCT(&alert); + opal_condition_signal(&orte_odls_globals.cond); OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); - if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(child->name, child->state, status))) { - ORTE_ERROR_LOG(rc); - } } int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state, @@ -1577,14 +1766,19 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state, { orte_odls_child_t *child; opal_list_item_t *item, *next; - int rc = 0, exit_status = 0, err; + int rc = ORTE_SUCCESS, exit_status = 0, err; opal_list_t procs_killed; - orte_namelist_t *proc; - + opal_buffer_t alert; + orte_plm_cmd_flag_t cmd=ORTE_PLM_UPDATE_PROC_STATE; + orte_vpid_t null=ORTE_VPID_INVALID; + orte_jobid_t last_job; + OBJ_CONSTRUCT(&procs_killed, opal_list_t); - opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: working on job %ld", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)job); + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:kill_local_proc working on job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job))); /* since we are going to be working with the global list of * children, we need to protect that list from modification @@ -1592,6 +1786,19 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state, */ OPAL_THREAD_LOCK(&orte_odls_globals.mutex); + /* setup the alert buffer - we will utilize the fact that + * children are stored on the list in job order. In other words, + * the children from one job are stored in sequence on the + * list + */ + OBJ_CONSTRUCT(&alert, opal_buffer_t); + /* pack update state command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&alert, &cmd, 1, ORTE_PLM_CMD))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + last_job = ORTE_JOBID_INVALID; + for (item = opal_list_get_first(&orte_odls_globals.children); item != opal_list_get_end(&orte_odls_globals.children); item = next) { @@ -1600,33 +1807,68 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state, /* preserve the pointer to the next item in list in case we release it */ next = opal_list_get_next(item); - opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: checking child process %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:kill_local_proc checking child process %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); /* do we have a child from the specified job? Because the * job could be given as a WILDCARD value, we must use * the dss.compare function to check for equality. */ - if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { + if (OPAL_EQUAL != opal_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:kill_local_proc child %s is not part of job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name), + ORTE_JOBID_PRINT(job))); + continue; } /* remove the child from the list since it is either already dead or soon going to be dead */ opal_list_remove_item(&orte_odls_globals.children, item); + /* store the jobid, if required */ + if (last_job != child->name->jobid) { + /* if it isn't the first time through, pack a job_end flag so the + * receiver can correctly process the buffer + */ + if (ORTE_JOBID_INVALID != last_job) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&alert, &null, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&alert, &(child->name->jobid), 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + last_job = child->name->jobid; + } + /* is this process alive? if not, then nothing for us * to do to it */ if (!child->alive) { - opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: child %s is not alive", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)); + + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:kill_local_proc child %s is not alive", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(child->name))); + /* ensure, though, that the state is terminated so we don't lockup if * the proc never started */ - continue; + goto RECORD; } - /* de-register the SIGCHILD callback for this pid */ + /* de-register the SIGCHILD callback for this pid so we don't get + * multiple alerts sent back to the HNP + */ if (ORTE_SUCCESS != (rc = orte_wait_cb_cancel(child->pid))) { /* no need to error_log this - it just means that the pid is already gone */ goto MOVEON; @@ -1638,6 +1880,18 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state, opal_show_help("help-odls-default.txt", "odls-default:could-not-send-kill", true, orte_system_info.nodename, child->pid, err); + /* check the proc state - ensure it is in one of the termination + * states so that we properly wakeup + */ + if (ORTE_PROC_STATE_UNDEF == child->state || + ORTE_PROC_STATE_INIT == child->state || + ORTE_PROC_STATE_LAUNCHED == child->state || + ORTE_PROC_STATE_RUNNING == child->state) { + /* we can't be sure what happened, but make sure we + * at least have a value that will let us eventually wakeup + */ + child->state = ORTE_PROC_STATE_TERMINATED; + } goto MOVEON; } @@ -1654,46 +1908,37 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state, true, orte_system_info.nodename, child->pid); } } + child->state = ORTE_PROC_STATE_ABORTED_BY_SIG; /* we may have sent it, but that's what happened */ + goto RECORD; MOVEON: /* set the process to "not alive" */ child->alive = false; - - /* add this proc to the local list */ - proc = OBJ_NEW(orte_namelist_t); - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(proc->name), child->name, ORTE_NAME))) { + +RECORD: + /* store the child in the alert buffer */ + if (ORTE_SUCCESS != (rc = pack_state_for_proc(&alert, false, child))) { ORTE_ERROR_LOG(rc); - opal_condition_signal(&orte_odls_globals.cond); - OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); - return rc; } - opal_list_append(&procs_killed, &proc->item); - - /* release the object since we killed it */ - OBJ_RELEASE(child); } - /* we are done with the global list, so we can now release + /* if set_state, alert the HNP to what happened */ + if (set_state) { + if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &alert, ORTE_RML_TAG_PLM, 0))) { + ORTE_ERROR_LOG(rc); + } else { + rc = ORTE_SUCCESS; /* need to reset this to success since we return rc */ + } + } + + /* we are done with the global list, so we can now release * any waiting threads - this also allows any callbacks to work */ opal_condition_signal(&orte_odls_globals.cond); OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex); + +CLEANUP: + OBJ_DESTRUCT(&alert); - /* deconstruct the local list and update the process states on the registry, if indicated */ - while (NULL != (item = opal_list_remove_first(&procs_killed))) { - proc = (orte_namelist_t*)item; - if (set_state) { - if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(proc->name, ORTE_PROC_STATE_TERMINATED, exit_status))) { - ORTE_ERROR_LOG(rc); - /* don't exit out even if this didn't work - we still might need to kill more - * processes, so just keep trucking - */ - } - } - OBJ_RELEASE(proc); - } - - OBJ_DESTRUCT(&procs_killed); - - return ORTE_SUCCESS; + return rc; } diff --git a/orte/mca/odls/base/odls_base_open.c b/orte/mca/odls/base/odls_base_open.c index 945f36d176..21b0ffaa07 100644 --- a/orte/mca/odls/base/odls_base_open.c +++ b/orte/mca/odls/base/odls_base_open.c @@ -18,7 +18,7 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" @@ -27,10 +27,11 @@ #include "opal/util/trace.h" #include "opal/util/argv.h" -#include "orte/dss/dss.h" -#include "orte/util/proc_info.h" +#include "opal/dss/dss.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/smr/smr_types.h" +#include "orte/mca/plm/plm_types.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/odls/base/odls_private.h" @@ -49,22 +50,6 @@ */ orte_odls_base_module_t orte_odls; -/* instance the app_context list object */ -static void orte_odls_app_context_constructor(orte_odls_app_context_t *ptr) -{ - ptr->environ_copy = NULL; -} -static void orte_odls_app_context_destructor(orte_odls_app_context_t *ptr) -{ - if (NULL != ptr->environ_copy) { - opal_argv_free(ptr->environ_copy); - } -} -OBJ_CLASS_INSTANCE(orte_odls_app_context_t, - opal_list_item_t, - orte_odls_app_context_constructor, - orte_odls_app_context_destructor); - /* instance the child list object */ static void orte_odls_child_constructor(orte_odls_child_t *ptr) @@ -74,15 +59,20 @@ static void orte_odls_child_constructor(orte_odls_child_t *ptr) ptr->pid = 0; ptr->app_idx = -1; ptr->alive = false; - ptr->state = ORTE_PROC_STATE_UNDEF; + /* set the default state to "failed to start" so + * we can correctly report should something + * go wrong during launch + */ + ptr->state = ORTE_PROC_STATE_FAILED_TO_START; ptr->exit_code = 0; ptr->cpu_set = 0xffffffff; - ptr->sync_required = false; + ptr->rml_uri = NULL; } static void orte_odls_child_destructor(orte_odls_child_t *ptr) { if (NULL != ptr->name) free(ptr->name); + if (NULL != ptr->rml_uri) free(ptr->rml_uri); } OBJ_CLASS_INSTANCE(orte_odls_child_t, opal_list_item_t, @@ -101,41 +91,14 @@ orte_odls_globals_t orte_odls_globals; */ int orte_odls_base_open(void) { - int param, value, rc; - orte_data_type_t tmp; - - OPAL_TRACE(5); - - /* Debugging / verbose output */ - - param = mca_base_param_reg_int_name("odls", "base_verbose", - "Verbosity level for the odls framework", - false, false, 0, &value); - if (value != 0) { - orte_odls_globals.output = opal_output_open(NULL); - } else { - orte_odls_globals.output = -1; - } + /* Debugging / verbose output. Always have stream open, with + verbose set by the mca open system... */ + orte_odls_globals.output = opal_output_open(NULL); mca_base_param_reg_int_name("odls", "base_sigkill_timeout", "Time to wait for a process to die after issuing a kill signal to it", false, false, 1, &orte_odls_globals.timeout_before_sigkill); - /* register the daemon cmd data type */ - tmp = ORTE_DAEMON_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_odls_pack_daemon_cmd, - orte_odls_unpack_daemon_cmd, - (orte_dss_copy_fn_t)orte_odls_copy_daemon_cmd, - (orte_dss_compare_fn_t)orte_odls_compare_daemon_cmd, - (orte_dss_size_fn_t)orte_odls_size_daemon_cmd, - (orte_dss_print_fn_t)orte_odls_print_daemon_cmd, - (orte_dss_release_fn_t)orte_odls_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_DAEMON_CMD", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* initialize globals */ OBJ_CONSTRUCT(&orte_odls_globals.mutex, opal_mutex_t); OBJ_CONSTRUCT(&orte_odls_globals.cond, opal_condition_t); diff --git a/orte/mca/odls/base/odls_base_purge_params.c b/orte/mca/odls/base/odls_base_purge_params.c deleted file mode 100644 index 7bbb94f5a5..0000000000 --- a/orte/mca/odls/base/odls_base_purge_params.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/util/opal_environ.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/mca/odls/base/odls_private.h" - - -/* Purge mca params not suitable for application procs */ -void orte_odls_base_purge_mca_params(char ***env) -{ - char *var; - - /* tell critical frameworks to only use their proxy components */ - var = mca_base_param_environ_variable("rds",NULL,NULL); - opal_setenv(var, "proxy", true, env); - free(var); - var = mca_base_param_environ_variable("ras",NULL,NULL); - opal_setenv(var, "proxy", true, env); - free(var); - var = mca_base_param_environ_variable("rmaps",NULL,NULL); - opal_setenv(var, "proxy", true, env); - free(var); - var = mca_base_param_environ_variable("pls",NULL,NULL); - opal_setenv(var, "proxy", true, env); - free(var); - var = mca_base_param_environ_variable("rmgr",NULL,NULL); - opal_setenv(var, "proxy", true, env); - free(var); -} diff --git a/orte/mca/odls/base/odls_base_select.c b/orte/mca/odls/base/odls_base_select.c index 97d8ecbf4d..aa0e6ea67a 100644 --- a/orte/mca/odls/base/odls_base_select.c +++ b/orte/mca/odls/base/odls_base_select.c @@ -18,13 +18,11 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" -#include "orte/mca/errmgr/errmgr.h" - #include "orte/mca/odls/base/base.h" diff --git a/orte/mca/odls/base/odls_base_state.c b/orte/mca/odls/base/odls_base_state.c index 170d2f6b94..94512705f9 100644 --- a/orte/mca/odls/base/odls_base_state.c +++ b/orte/mca/odls/base/odls_base_state.c @@ -19,7 +19,7 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #ifdef HAVE_SYS_PARAM_H #include @@ -31,15 +31,13 @@ #include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/util/trace.h" - -#include "orte/util/sys_info.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/smr/smr.h" -#include "orte/dss/dss.h" - #include "opal/util/show_help.h" #include "opal/util/basename.h" + +#include "orte/util/sys_info.h" +#include "orte/util/name_fns.h" +#include "orte/mca/errmgr/errmgr.h" + #include "orte/mca/filem/filem.h" #include "orte/mca/filem/base/base.h" @@ -47,94 +45,6 @@ #include "orte/mca/odls/base/odls_private.h" -/* - * Function for reporting the state and other process-related info - * for newly spawned child processes - */ -int orte_odls_base_report_spawn(opal_list_t *children) -{ - opal_list_item_t *item; - orte_odls_child_t *child; - char **tokens, *segment; - orte_std_cntr_t num_tokens; - orte_gpr_addr_mode_t mode = ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND | ORTE_GPR_KEYS_OR; - orte_data_value_t dval = ORTE_DATA_VALUE_EMPTY; - orte_buffer_t *buffer; - int rc; - - buffer = OBJ_NEW(orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_gpr.begin_compound_cmd(buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - return rc; - } - - for (item = opal_list_get_first(children); - item != opal_list_get_end(children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - - if (ORTE_PROC_STATE_LAUNCHED == child->state) { - /* when we launch the child, we need to store the pid - * in addition to setting the state. Be sure to store - * the pid first, though, as setting the state can - * cause triggers to fire - */ - if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&tokens, &num_tokens, child->name))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, child->name->jobid))) { - ORTE_ERROR_LOG(rc); - opal_argv_free(tokens); - OBJ_RELEASE(buffer); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_dss.set(&dval, (void*)&(child->pid), ORTE_PID))) { - ORTE_ERROR_LOG(rc); - opal_argv_free(tokens); - free(segment); - OBJ_RELEASE(buffer); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_gpr.put_1(mode, segment, tokens, ORTE_PROC_LOCAL_PID_KEY, &dval))) { - ORTE_ERROR_LOG(rc); - opal_argv_free(tokens); - free(segment); - OBJ_RELEASE(buffer); - return rc; - } - dval.data = NULL; - opal_argv_free(tokens); - free(segment); - - /* now set the process state to LAUNCHED */ - if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(child->name, ORTE_PROC_STATE_LAUNCHED, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - return rc; - } - } else if (ORTE_PROC_STATE_FAILED_TO_START == child->state) { - if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(child->name, ORTE_PROC_STATE_FAILED_TO_START, child->exit_code))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - return rc; - } - } - } - - if (ORTE_SUCCESS != (rc = orte_gpr.exec_compound_cmd(buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - return rc; - } - OBJ_RELEASE(buffer); - - /* All done */ - return ORTE_SUCCESS; -} - /* * Preload all files for a single app context */ @@ -161,16 +71,18 @@ int orte_odls_base_preload_files_app_context(orte_app_context_t* app_context) /* Define the process set */ p_set = OBJ_NEW(orte_filem_base_process_set_t); - if( NULL == orte_process_info.gpr_replica ) { - p_set->source.jobid = orte_process_info.my_name->jobid; - p_set->source.vpid = orte_process_info.my_name->vpid; + if( orte_process_info.hnp ) { + /* if I am the HNP, then use me as the source */ + p_set->source.jobid = ORTE_PROC_MY_NAME->jobid; + p_set->source.vpid = ORTE_PROC_MY_NAME->vpid; } else { - p_set->source.jobid = orte_process_info.gpr_replica->jobid; - p_set->source.vpid = orte_process_info.gpr_replica->vpid; + /* otherwise, set the HNP as the source */ + p_set->source.jobid = ORTE_PROC_MY_HNP->jobid; + p_set->source.vpid = ORTE_PROC_MY_HNP->vpid; } - p_set->sink.jobid = orte_process_info.my_name->jobid; - p_set->sink.vpid = orte_process_info.my_name->vpid; + p_set->sink.jobid = ORTE_PROC_MY_NAME->jobid; + p_set->sink.vpid = ORTE_PROC_MY_NAME->vpid; opal_list_append(&(filem_request->process_sets), &(p_set->super) ); diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h index cd8710ef74..8f62f5cc5f 100644 --- a/orte/mca/odls/base/odls_private.h +++ b/orte/mca/odls/base/odls_private.h @@ -25,24 +25,21 @@ * includes */ #include "orte_config.h" +#include "orte/types.h" #include "opal/class/opal_list.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" -#include "orte/dss/dss_types.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/rmgr/rmgr_types.h" -#include "orte/mca/smr/smr_types.h" +#include "opal/dss/dss_types.h" +#include "orte/mca/plm/plm_types.h" #include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/rml/rml_types.h" -#include "orte/mca/gpr/gpr_types.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/odls/odls_types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /* * General ODLS types @@ -61,24 +58,11 @@ typedef struct orte_odls_child_t { orte_std_cntr_t app_idx; /* index of the app_context for this proc */ bool alive; /* is this proc alive? */ orte_proc_state_t state; /* the state of the process */ - int exit_code; /* process exit code */ + orte_exit_code_t exit_code; /* process exit code */ unsigned long cpu_set; - bool sync_required; /* require sync before termination */ + char *rml_uri; /* contact info for this child */ } orte_odls_child_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_odls_child_t); - -/* - * List object to locally store app_contexts returned by the - * registry subscription. Since we don't know how many app_contexts will - * be returned, we need to store them on a list. - */ -typedef struct orted_odls_app_context_t { - opal_list_item_t super; /* required to place this on a list */ - orte_app_context_t *app_context; - char **environ_copy; /* the environment for this app_context */ -} orte_odls_app_context_t; -OBJ_CLASS_DECLARATION(orte_odls_app_context_t); - typedef struct orte_odls_globals_t { /** Verbose/debug output stream */ @@ -95,21 +79,17 @@ typedef struct orte_odls_globals_t { ORTE_DECLSPEC extern orte_odls_globals_t orte_odls_globals; -ORTE_DECLSPEC int orte_odls_base_report_spawn(opal_list_t *children); - -ORTE_DECLSPEC void orte_odls_base_purge_mca_params(char ***env); - /* * Default functions that are common to most environments - can * be overridden by specific environments if they need something * different (e.g., bproc) */ ORTE_DECLSPEC int -orte_odls_base_default_get_add_procs_data(orte_gpr_notify_data_t **data, - orte_job_map_t *map); +orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, + orte_jobid_t job); ORTE_DECLSPEC int -orte_odls_base_default_construct_child_list(orte_gpr_notify_data_t *data, +orte_odls_base_default_construct_child_list(opal_buffer_t *data, orte_jobid_t *job, orte_std_cntr_t *num_local_procs, orte_vpid_t *vpid_range, @@ -117,7 +97,8 @@ orte_odls_base_default_construct_child_list(orte_gpr_notify_data_t *data, bool *node_included, bool *oversubscribed, bool *override_oversubscribed, - opal_list_t *app_context_list); + orte_std_cntr_t *num_contexts, + orte_app_context_t ***app_contexts); /* define a function that will fork a local proc */ typedef int (*orte_odls_base_fork_local_proc_fn_t)(orte_app_context_t *context, @@ -125,7 +106,9 @@ typedef int (*orte_odls_base_fork_local_proc_fn_t)(orte_app_context_t *context, char **environ_copy); ORTE_DECLSPEC int -orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_context_list, +orte_odls_base_default_launch_local(orte_jobid_t job, + orte_std_cntr_t num_apps, + orte_app_context_t **apps, orte_std_cntr_t num_local_procs, orte_vpid_t vpid_range, orte_std_cntr_t total_slots_allocated, @@ -134,12 +117,7 @@ orte_odls_base_default_launch_local(orte_jobid_t job, opal_list_t *app_context_l orte_odls_base_fork_local_proc_fn_t fork_local); ORTE_DECLSPEC int -orte_odls_base_default_extract_proc_map_info(orte_process_name_t *daemon, - opal_list_t *proc_list, - orte_gpr_value_t *value); - -ORTE_DECLSPEC int -orte_odls_base_default_deliver_message(orte_jobid_t job, orte_buffer_t *buffer, orte_rml_tag_t tag); +orte_odls_base_default_deliver_message(orte_jobid_t job, opal_buffer_t *buffer, orte_rml_tag_t tag); ORTE_DECLSPEC void odls_base_default_wait_local_proc(pid_t pid, int status, void* cbdata); @@ -161,40 +139,13 @@ orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state, orte_odls_base_kill_local_fn_t kill_local, orte_odls_base_child_died_fn_t child_died); -ORTE_DECLSPEC int orte_odls_base_default_require_sync(orte_process_name_t *proc); - -/* - * data type functions - */ - -ORTE_DECLSPEC int -orte_odls_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_flag_t *value2, orte_data_type_t type); - -ORTE_DECLSPEC int -orte_odls_copy_daemon_cmd(orte_daemon_cmd_flag_t **dest, orte_daemon_cmd_flag_t *src, orte_data_type_t type); - -ORTE_DECLSPEC int -orte_odls_pack_daemon_cmd(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -ORTE_DECLSPEC int -orte_odls_print_daemon_cmd(char **output, char *prefix, orte_daemon_cmd_flag_t *src, orte_data_type_t type); - -ORTE_DECLSPEC void orte_odls_std_release(orte_data_value_t *value); - -ORTE_DECLSPEC int -orte_odls_size_daemon_cmd(size_t *size, orte_daemon_cmd_flag_t *src, orte_data_type_t type); - -ORTE_DECLSPEC int -orte_odls_unpack_daemon_cmd(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); +ORTE_DECLSPEC int orte_odls_base_default_require_sync(orte_process_name_t *proc, opal_buffer_t *buf); /* * Preload binary/files functions */ ORTE_DECLSPEC int orte_odls_base_preload_files_app_context(orte_app_context_t* context); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS + #endif diff --git a/orte/mca/odls/bproc/Makefile.am b/orte/mca/odls/bproc/Makefile.am deleted file mode 100644 index 0024eb30c5..0000000000 --- a/orte/mca/odls/bproc/Makefile.am +++ /dev/null @@ -1,48 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include $(odls_bproc_CPPFLAGS) - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_odls_bproc_DSO -component_noinst = -component_install = mca_odls_bproc.la -else -component_noinst = libmca_odls_bproc.la -component_install = -endif - -sources = \ - odls_bproc.h \ - odls_bproc.c \ - odls_bproc_component.c - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_odls_bproc_la_SOURCES = $(sources) -mca_odls_bproc_la_LIBADD = $(odls_bproc_LIBS) -mca_odls_bproc_la_LDFLAGS = -module -avoid-version $(odls_bproc_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_odls_bproc_la_SOURCES = $(sources) -libmca_odls_bproc_la_LIBADD = $(odls_bproc_LIBS) -libmca_odls_bproc_la_LDFLAGS = -module -avoid-version $(odls_bproc_LDFLAGS) diff --git a/orte/mca/odls/bproc/configure.m4 b/orte/mca/odls/bproc/configure.m4 deleted file mode 100644 index c042c4689d..0000000000 --- a/orte/mca/odls/bproc/configure.m4 +++ /dev/null @@ -1,38 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_odls_bproc_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_odls_bproc_CONFIG],[ - OMPI_CHECK_BPROC([odls_bproc], [odls_bproc_good=1], - [odls_bproc_good=1], [odls_bproc_good=0]) - - # if check worked, set wrapper flags if so. - # Evaluate succeed / fail - AS_IF([test "$odls_bproc_good" = "1"], - [odls_bproc_WRAPPER_EXTRA_LDFLAGS="$odls_bproc_LDFLAGS" - odls_bproc_WRAPPER_EXTRA_LIBS="$odls_bproc_LIBS" - $1], - [$2]) - - # set build flags to use in makefile - AC_SUBST([odls_bproc_CPPFLAGS]) - AC_SUBST([odls_bproc_LDFLAGS]) - AC_SUBST([odls_bproc_LIBS]) -])dnl diff --git a/orte/mca/odls/bproc/configure.params b/orte/mca/odls/bproc/configure.params deleted file mode 100644 index 3513f8d956..0000000000 --- a/orte/mca/odls/bproc/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/odls/bproc/odls_bproc.c b/orte/mca/odls/bproc/odls_bproc.c deleted file mode 100644 index 1f72af2420..0000000000 --- a/orte/mca/odls/bproc/odls_bproc.c +++ /dev/null @@ -1,872 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file: - * Part of the bproc launcher. - * See odls_bproc.h for an overview of how it works. - */ -#include "orte_config.h" -#include -#include -#include -#include -#include -#include - -#include "opal/mca/base/mca_base_param.h" -#include "opal/runtime/opal_progress.h" -#include "opal/threads/condition.h" -#include "opal/util/os_dirpath.h" -#include "opal/util/os_path.h" -#include "opal/util/output.h" - -#include "orte/dss/dss.h" -#include "orte/util/sys_info.h" -#include "orte/orte_constants.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/iof/iof.h" -#include "orte/mca/iof/base/iof_base_setup.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/util/session_dir.h" -#include "orte/util/univ_info.h" - -#include "orte/mca/odls/base/odls_private.h" -#include "odls_bproc.h" - -/** - * Initialization of the bproc_orted module with all the needed function pointers - */ -orte_odls_base_module_t orte_odls_bproc_module = { - orte_odls_bproc_get_add_procs_data, - orte_odls_bproc_launch_local_procs, - orte_odls_bproc_kill_local_procs, - orte_odls_bproc_signal_local_procs, - orte_odls_bproc_deliver_message, - orte_odls_bproc_get_local_proc_names -}; - -static int odls_bproc_make_dir(char *directory); -static char * odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid, - orte_std_cntr_t app_context); -static void odls_bproc_delete_dir_tree(char * path); -static int odls_bproc_remove_dir(void); -static void odls_bproc_send_cb(int status, orte_process_name_t * peer, - orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); -static int odls_bproc_setup_stdio(orte_process_name_t *proc_name, - int proc_rank, orte_jobid_t jobid, - orte_std_cntr_t app_context, bool connect_stdin); - - -int orte_odls_bproc_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map) -{ - orte_gpr_notify_data_t *ndat; - orte_gpr_value_t **values, *value; - orte_std_cntr_t cnt; - opal_list_item_t *item, *m_item; - orte_mapped_node_t *node; - orte_mapped_proc_t *proc; - int rc; - - /* set default answer */ - *data = NULL; - - ndat = OBJ_NEW(orte_gpr_notify_data_t); - if (NULL == ndat) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* construct a fake trigger name so that the we can extract the jobid from it later */ - if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(ndat->target), "bogus", map->job))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* our required info is in the mapped_node objects, so all we - * have to do is transfer it over - */ - for (m_item = opal_list_get_first(&map->nodes); - m_item != opal_list_get_end(&map->nodes); - m_item = opal_list_get_next(m_item)) { - node = (orte_mapped_node_t*)m_item; - - for (item = opal_list_get_first(&node->procs); - item != opal_list_get_end(&node->procs); - item = opal_list_get_next(item)) { - proc = (orte_mapped_proc_t*)item; - - /* must not have any tokens so that launch_procs can process it correctly */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, 0, "bogus", 5, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), - ORTE_PROC_NAME_KEY, - ORTE_NAME, &proc->name))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), - ORTE_PROC_APP_CONTEXT_KEY, - ORTE_STD_CNTR, &proc->app_idx))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), - ORTE_NODE_NAME_KEY, - ORTE_STRING, node->nodename))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), - ORTE_PROC_LOCAL_RANK_KEY, - ORTE_VPID, &proc->local_rank))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), - ORTE_NODE_NUM_PROCS_KEY, - ORTE_STD_CNTR, &node->num_procs))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&cnt, ndat->values, value))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(values[0]); - return rc; - } - ndat->cnt += 1; - } - } - - *data = ndat; - return ORTE_SUCCESS; -} - - -/** - * Creates the passed directory. If the directory already exists, it and its - * contents will be deleted then the directory will be created. - * @param directory The directory to be created. - * @retval ORTE_SUCCESS - * @retval error - */ -static int -odls_bproc_make_dir(char *directory) -{ - struct stat buf; - mode_t my_mode = S_IRWXU; /* at the least, I need to be able to do anything */ - - if (0 == stat(directory, &buf)) { /* exists - delete it and its contents */ - odls_bproc_delete_dir_tree(directory); - } - /* try to create it with proper mode */ - return(opal_os_dirpath_create(directory, my_mode)); -} - - -/** - * Returns a path of the form: - * @code - * /tmp/openmpi-bproc-//-// - * @endcode - * which is used to put links to the pty/pipes in - * @param proc_rank the process's rank on the node - * @param jobid the jobid the proc belongs to - * @param app_context the application context number within the job - * @retval path - */ -static char * - odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid, - orte_std_cntr_t app_context) -{ - char *path = NULL, *user = NULL, *job = NULL; - int rc; - - /* ensure that system info is set */ - orte_sys_info(); - - if (NULL == orte_universe_info.name) { /* error condition */ - ORTE_ERROR_LOG(ORTE_ERROR); - return NULL; - } - - rc = orte_ns.convert_jobid_to_string(&job, jobid); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return NULL; - } - - /* get the username set by the bproc pls. We need to get it from here - * because on many bproc systems the method we use to get the username - * from the system on the backend fails and we only get the uid. */ - rc = mca_base_param_register_string("pls", "bproc", "username", NULL, - orte_system_info.user); - mca_base_param_lookup_string(rc,&user); - - if (0 > asprintf(&path, OPAL_PATH_SEP"tmp"OPAL_PATH_SEP"openmpi-bproc-%s"OPAL_PATH_SEP"%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d", - user, orte_universe_info.name, - job, (int) app_context, proc_rank)) { - ORTE_ERROR_LOG(ORTE_ERROR); - path = NULL; - } - if(0 < mca_odls_bproc_component.debug) { - opal_output(0, "odls bproc io setup. Path: %s\n", path); - } - free(user); - free(job); - return path; -} - - -/** - * deletes the passed directory tree recursively - * @param path the path to the base directory to delete - */ -static void -odls_bproc_delete_dir_tree(char * path) -{ - DIR *dp; - struct dirent *ep; - char *filenm; - int ret; - struct stat buf; - dp = opendir(path); - if (NULL == dp) { - return; - } - - while (NULL != (ep = readdir(dp)) ) { - /* skip: . and .. */ - if ((0 != strcmp(ep->d_name, ".")) && (0 != strcmp(ep->d_name, ".."))) { - filenm = opal_os_path(false, path, ep->d_name, NULL); - ret = stat(filenm, &buf); - if (ret < 0 || S_ISDIR(buf.st_mode)) { - odls_bproc_delete_dir_tree(filenm); - free(filenm); - continue; - } - unlink(filenm); - free(filenm); - } - } - closedir(dp); - rmdir(path); -} - - -/** - * Removes the bproc directory - * @code /tmp/openmpi-bproc-/ @endcode and all of its contents - * @retval ORTE_SUCCESS - * @retval error - */ -static int -odls_bproc_remove_dir() -{ - char *frontend = NULL, *user = NULL, *filename = NULL; - int id; - - /* get the username set by the bproc pls. We need to get it from here - * because on many bproc systems the method we use to get the username - * from the system on the backend fails and we only get the uid. */ - id = mca_base_param_register_string("pls", "bproc", "username", NULL, - orte_system_info.user); - mca_base_param_lookup_string(id,&user); - asprintf(&filename, "openmpi-bproc-%s", user ); - if( NULL == filename ) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERROR; - } - frontend = opal_os_path(false, "tmp", filename, NULL ); - free(filename); /* Always free the filename */ - if (NULL == frontend) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERROR; - } - /* we do our best to clean up the directory tree, but we ignore errors*/ - odls_bproc_delete_dir_tree(frontend); - free(frontend); - return ORTE_SUCCESS; -} - - -/** - * Callback function for when we tell mpirun we are ready - * @param status - * @param peer - * @param buffer - * @param tag - * @param cbdata - */ -static void -odls_bproc_send_cb(int status, orte_process_name_t * peer, - orte_buffer_t* buffer, - orte_rml_tag_t tag, void* cbdata) -{ - OBJ_RELEASE(buffer); -} - - -/** - * Create Standard I/O symlinks in the filesystem for a given proc - * - * Create Standard I/O symlinks in the filesystem for a given proc. - * The symlinks will be placed in: - * @code - * /tmp/openmpi-bproc-//-// - * @endcode - * - * The symlinks will be to FIFOs for stdin and stderr. stdout will either - * be to a FIFO or pty, depending on the configuration of Open MPI. - * - * @param proc_rank the process's rank on the node - * @param jobid the jobid the proc belongs to - * @param app_context the application context number within the job - * @param connect_stdin if true, stdin will be connected, otherwise it will be - * set to /dev/null - * - * @retval ORTE_SUCCESS - * @retval error - */ -static int -odls_bproc_setup_stdio(orte_process_name_t *proc_name, int proc_rank, - orte_jobid_t jobid, - orte_std_cntr_t app_context, bool connect_stdin) -{ - char *path_prefix, *fd_link_path = NULL; - int rc = ORTE_SUCCESS, fd; -#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0) - int amaster, aslave; - char pty_name[256]; - struct termios term_attrs; -#endif - - path_prefix = odls_bproc_get_base_dir_name(proc_rank, jobid, (size_t)app_context); - if (NULL == path_prefix) { - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* check for existence and access, or create it */ - if (ORTE_SUCCESS != (rc = odls_bproc_make_dir(path_prefix))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* setup the stdin FIFO. Always use a fifo for the same reason we - always use a pipe in the iof_setup code -- don't want to flush - onto the floor during close */ - fd_link_path = opal_os_path( false, path_prefix, "0", NULL ); - if (NULL == fd_link_path) { - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - if (connect_stdin) { - if (0 != mkfifo(fd_link_path, S_IRWXU)) { - perror("odls_bproc mkfifo failed"); - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - fd = open(fd_link_path, O_RDWR); - if (-1 == fd) { - perror("odls_bproc open failed"); - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - orte_iof.iof_publish(proc_name, ORTE_IOF_SINK, - ORTE_IOF_STDIN, fd); - } else { - if(0 != symlink("/dev/null", fd_link_path)) { - perror("odls_bproc could not create symlink"); - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - } - - free(fd_link_path); - fd_link_path = NULL; - - /* setup the stdout PTY / FIFO */ - fd_link_path = opal_os_path( false, path_prefix, "1", NULL ); - if (NULL == fd_link_path) { - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - -#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0) - if (0 != openpty(&amaster, &aslave, pty_name, NULL, NULL)) { - opal_output(0, "odls_bproc: openpty failed, using pipes instead"); - goto stdout_fifo_setup; - } - - if (0 != symlink(pty_name, fd_link_path)) { - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - if (tcgetattr(aslave, &term_attrs) < 0) { - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - term_attrs.c_lflag &= ~ (ECHO | ECHOE | ECHOK | - ECHOCTL | ECHOKE | ECHONL); - term_attrs.c_iflag &= ~ (ICRNL | INLCR | ISTRIP | INPCK | IXON); - term_attrs.c_oflag &= ~ (OCRNL | ONLCR); - if (tcsetattr(aslave, TCSANOW, &term_attrs) == -1) { - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE, - ORTE_IOF_STDOUT, amaster); - - goto stderr_fifo_setup; - -stdout_fifo_setup: -#endif - - if (0 != mkfifo(fd_link_path, S_IRWXU)) { - perror("odls_bproc mkfifo failed"); - rc = ORTE_ERROR; - goto cleanup; - } - - fd = open(fd_link_path, O_RDWR); - if (-1 == fd) { - perror("odls_bproc open failed"); - rc = ORTE_ERROR; - goto cleanup; - } - - orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE, - ORTE_IOF_STDOUT, fd); - -#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0) -stderr_fifo_setup: -#endif - - free(fd_link_path); - fd_link_path = NULL; - - /* setup the stderr FIFO. Always a fifo */ - fd_link_path = opal_os_path( false, path_prefix, "2", NULL ); - if (NULL == fd_link_path) { - rc = ORTE_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - if (0 != mkfifo(fd_link_path, S_IRWXU)) { - perror("odls_bproc mkfifo failed"); - rc = ORTE_ERROR; - goto cleanup; - } - - fd = open(fd_link_path, O_RDWR); - if (-1 == fd) { - perror("odls_bproc open failed"); - rc = ORTE_ERROR; - goto cleanup; - } - - orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE, - ORTE_IOF_STDERR, fd); - -cleanup: - if (NULL != path_prefix) { - free(path_prefix); - } - if (NULL != fd_link_path) { - free(fd_link_path); - } - return rc; -} - - -/** - * Setup io for the current node, then tell orterun we are ready for the actual - * processes. - * @retval ORTE_SUCCESS - * @retval error - */ -int -orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data) -{ - odls_bproc_child_t *child; - opal_list_item_t* item; - orte_gpr_value_t *value, **values; - orte_gpr_keyval_t *kval; - char *node_name; - int rc; - orte_std_cntr_t i, j, kv, kv2, *sptr; - int src = 0; - orte_buffer_t *ack; - bool connect_stdin; - orte_jobid_t jobid; - int cycle = 0; - char *job_str=NULL, *vpid_str, *uri_file, *my_uri=NULL, *session_dir=NULL; - FILE *fp; - orte_vpid_t *vptr; - bool node_included; - - /* first, retrieve the job number we are to launch from the - * returned data - we can extract the jobid directly from the - * subscription name we created - */ - if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&jobid, data->target))) { - ORTE_ERROR_LOG(rc); - src = rc; - goto CALLHOME; - } - - /** - * hack for bproc4, change process group so that we do not receive signals - * from the parent/front-end process, as bproc4 does not currently allow the - * process to intercept the signal - */ - setpgid(0,0); - - /* set the flag indicating this node is not included in the launch data */ - node_included = false; - - /* loop through the returned data to find the global info and - * the info for processes going onto this node - */ - values = (orte_gpr_value_t**)(data->values)->addr; - for (j=0, i=0; i < data->cnt && j < (data->values)->size; j++) { /* loop through all returned values */ - if (NULL != values[j]) { - i++; - value = values[j]; - /* this must have come from one of the process containers, so it must - * contain data for a proc structure - see if it belongs to this node - */ - for (kv=0; kv < value->cnt; kv++) { - kval = value->keyvals[kv]; - if (strcmp(kval->key, ORTE_NODE_NAME_KEY) == 0) { - /* Most C-compilers will bark if we try to directly compare the string in the - * kval data area against a regular string, so we need to "get" the data - * so we can access it */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&node_name, kval->value, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - src = rc; - goto CALLHOME; - } - /* if this is our node...must also protect against a zero-length string */ - if (NULL != node_name && 0 == strcmp(node_name, orte_system_info.nodename)) { - /* indicate that there is something for us to do */ - node_included = true; - - /* setup and populate the child object */ - child = OBJ_NEW(odls_bproc_child_t); - for (kv2 = 0; kv2 < value->cnt; kv2++) { - kval = value->keyvals[kv2]; - if(strcmp(kval->key, ORTE_PROC_NAME_KEY) == 0) { - /* copy the name into the child object */ - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(child->name), kval->value->data, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - src = rc; - goto CALLHOME; - } - continue; - } - if(strcmp(kval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - src = rc; - goto CALLHOME; - } - child->app_idx = *sptr; /* save the index into the app_context objects */ - continue; - } - if(strcmp(kval->key, ORTE_PROC_LOCAL_RANK_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, kval->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - src = rc; - goto CALLHOME; - } - child->local_rank = *vptr; /* save the local_rank */ - continue; - } - if(strcmp(kval->key, ORTE_NODE_NUM_PROCS_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - src = rc; - goto CALLHOME; - } - child->num_procs = *sptr; /* save the number of procs from this job on this node */ - continue; - } - } /* kv2 */ - /* protect operation on the global list of children */ - OPAL_THREAD_LOCK(&mca_odls_bproc_component.lock); - opal_list_append(&mca_odls_bproc_component.children, &child->super); - opal_condition_signal(&mca_odls_bproc_component.cond); - OPAL_THREAD_UNLOCK(&mca_odls_bproc_component.lock); - - } - } - } /* for kv */ - } /* for j */ - } - - /* if there is nothing for us to do, we still have to report back - * before we just return - */ - if (!node_included) { - rc = ORTE_SUCCESS; - goto CALLHOME; - } - - /* setup some values we'll need to drop my uri for each child */ - orte_ns.convert_jobid_to_string(&job_str, jobid); - my_uri = orte_rml.get_contact_info(); - - /* set up the io files for our children */ - for(item = opal_list_get_first(&mca_odls_bproc_component.children); - item != opal_list_get_end(&mca_odls_bproc_component.children); - item = opal_list_get_next(item)) { - child = (odls_bproc_child_t *) item; - if(0 < mca_odls_bproc_component.debug) { - opal_output(0, "orte_odls_bproc_launch: setting up io for " - "%s proc rank %ld\n", - ORTE_NAME_PRINT((child->name)), - (long)child->name->vpid); - } - /* only setup to forward stdin if it is rank 0, otherwise connect - * to /dev/null */ - if(0 == child->name->vpid) { - connect_stdin = true; - } else { - connect_stdin = false; - } - - rc = odls_bproc_setup_stdio(child->name, cycle, - jobid, child->app_idx, - connect_stdin); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - src = rc; - goto CALLHOME; - } - - /* record my uri in a file within the session directory so the child can contact me */ - /* get the session dir for this proc */ - orte_ns.convert_vpid_to_string(&vpid_str, child->name->vpid); - - if (ORTE_SUCCESS != (rc = orte_session_dir(true, NULL, NULL, NULL, - NULL, NULL, job_str, vpid_str))) { - ORTE_ERROR_LOG(rc); - src = rc; - goto CALLHOME; - } - - /* get the session dir name so we can put the file there */ - if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&session_dir, NULL, NULL, NULL, - NULL, NULL, NULL, job_str, vpid_str))) { - ORTE_ERROR_LOG(rc); - src = rc; - goto CALLHOME; - } - free(vpid_str); - - /* create the file and put my uri, this child's local rank, and the - * number of local procs into it */ - uri_file = opal_os_path(false, session_dir, "orted-uri.txt", NULL); - fp = fopen(uri_file, "w"); - if (NULL == fp) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - rc = ORTE_ERR_FILE_OPEN_FAILURE; - src = rc; - goto CALLHOME; - } - fprintf(fp, "%s\n", my_uri); - fprintf(fp, "%ld\n", (long)child->local_rank); - fprintf(fp, "%ld\n", (long)child->num_procs); - fclose(fp); - free(uri_file); - - cycle++; - } - - /* release the jobid string and uri */ - free(job_str); - free(my_uri); - -CALLHOME: - /* message to indicate that we are ready */ - ack = OBJ_NEW(orte_buffer_t); - rc = orte_dss.pack(ack, &src, 1, ORTE_INT); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - } - rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, ack, ORTE_RML_TAG_BPROC, 0, - odls_bproc_send_cb, NULL); - if (0 > rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - rc = ORTE_SUCCESS; - -cleanup: - - return rc; -} - -/** - * Function to terminate a job. Since this component only runs on remote nodes - * and doesn't actually launch any processes, this function is not needed - * so is a noop. - */ -int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state) -{ - orte_iof.iof_flush(); - return ORTE_SUCCESS; -} - -/** - * Function to signal a process. Since this component only runs on remote nodes - * and doesn't actually launch any processes, this function is not needed - * so is a noop. - * @param proc the process's name - * @param signal The signal to send - * @retval ORTE_SUCCESS - */ -int orte_odls_bproc_signal_local_procs(const orte_process_name_t* proc, int32_t signal) -{ - orte_iof.iof_flush(); - return ORTE_SUCCESS; -} - - -int orte_odls_bproc_deliver_message(orte_jobid_t job, orte_buffer_t *buffer, orte_rml_tag_t tag) -{ - int rc; - opal_list_item_t *item; - orte_odls_child_t *child; - - /* protect operations involving the global list of children */ - OPAL_THREAD_LOCK(&mca_odls_bproc_component.lock); - - for (item = opal_list_get_first(&mca_odls_bproc_component.children); - item != opal_list_get_end(&mca_odls_bproc_component.children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - - /* do we have a child from the specified job. Because the - * job could be given as a WILDCARD value, we must use - * the dss.compare function to check for equality. - */ - if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { - continue; - } - - /* if so, send the message */ - rc = orte_rml.send_buffer(child->name, buffer, tag, 0); - if (rc < 0) { - ORTE_ERROR_LOG(rc); - } - } - - opal_condition_signal(&mca_odls_bproc_component.cond); - OPAL_THREAD_UNLOCK(&mca_odls_bproc_component.lock); - return ORTE_SUCCESS; -} - - -int orte_odls_bproc_get_local_proc_names(opal_list_t *names, orte_jobid_t job) -{ - opal_list_item_t *item; - orte_odls_child_t *child; - orte_namelist_t *nitem; - - /* protect operations involving the global list of children */ - OPAL_THREAD_LOCK(&mca_odls_bproc_component.lock); - - for (item = opal_list_get_first(&mca_odls_bproc_component.children); - item != opal_list_get_end(&mca_odls_bproc_component.children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - - /* do we have a child from the specified job. Because the - * job could be given as a WILDCARD value, we must use - * the dss.compare function to check for equality. - */ - if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { - continue; - } - - /* add this name to the list */ - nitem = OBJ_NEW(orte_namelist_t); - orte_dss.copy((void**)&nitem->name, child->name, ORTE_NAME); - opal_list_append(names, &nitem->item); - } - opal_condition_signal(&mca_odls_bproc_component.cond); - OPAL_THREAD_UNLOCK(&mca_odls_bproc_component.lock); - return ORTE_SUCCESS; -} - - -/** - * Finalizes the bproc module. Cleanup tmp directory/files - * used for I/O forwarding. - * @retval ORTE_SUCCESS - */ -int orte_odls_bproc_finalize(void) -{ - orte_iof.iof_flush(); - odls_bproc_remove_dir(); - orte_session_dir_finalize(orte_process_info.my_name); - return ORTE_SUCCESS; -} - diff --git a/orte/mca/odls/bproc/odls_bproc.h b/orte/mca/odls/bproc/odls_bproc.h deleted file mode 100644 index c214c1ae9b..0000000000 --- a/orte/mca/odls/bproc/odls_bproc.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file: - * Part of the bproc launching system. This launching system is broken into 2 - * parts: one runs under the PLS on the head node to launch the orteds, and the - * other serves as the orted's local launcher. - * - * The main job of this component is to setup ptys/pipes for IO forwarding. - * See pls_bproc.h for an overview of how the entire bproc launching system works. - */ -#ifndef ORTE_ODLS_BPROC_H_ -#define ORTE_ODLS_BPROC_H_ - -#include "orte_config.h" - -#include - -#include "opal/mca/mca.h" -#include "opal/threads/condition.h" -#include "opal/class/opal_list.h" - -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/rmaps/rmaps_types.h" - -#include "orte/mca/odls/odls.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_odls_bproc_component_open(void); -int orte_odls_bproc_component_close(void); -int orte_odls_bproc_finalize(void); -orte_odls_base_module_t* orte_odls_bproc_init(int *priority); - -/* - * Startup / Shutdown - */ -int orte_odls_bproc_finalize(void); - -/* - * Interface - */ -int orte_odls_bproc_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map); -int orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data); -int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state); -int orte_odls_bproc_signal_local_procs(const orte_process_name_t* proc_name, int32_t signal); -int orte_odls_bproc_deliver_message(orte_jobid_t job, orte_buffer_t *buffer, orte_rml_tag_t tag); -int orte_odls_bproc_get_local_proc_names(opal_list_t *names, orte_jobid_t job); - -/** - * ODLS bproc_orted component - */ -struct orte_odls_bproc_component_t { - orte_odls_base_component_t super; - /**< The base class */ - int debug; - /**< If greater than 0 print debugging information */ - int priority; - /**< The priority of this component. This will be returned if we determine - * that bproc is available and running on this node, */ - opal_mutex_t lock; - /**< Lock used to prevent some race conditions */ - opal_condition_t cond; - /**< Condition used to wake up waiting threads */ - opal_list_t children; - /**< list of children on this node */ -}; -/** - * Convenience typedef - */ -typedef struct orte_odls_bproc_component_t orte_odls_bproc_component_t; - -/* - * List object to locally store the process names and pids of - * our children. This can subsequently be used to order termination - * or pass signals without looking the info up again. - */ -typedef struct odls_bproc_child_t { - opal_list_item_t super; /* required to place this on a list */ - orte_process_name_t *name; /* the OpenRTE name of the proc */ - pid_t pid; /* local pid of the proc */ - orte_std_cntr_t app_idx; /* index of the app_context for this proc */ - bool alive; /* is this proc alive? */ - orte_vpid_t local_rank; /* local rank of this proc */ - orte_std_cntr_t num_procs; /* number of local procs sharing this node */ -} odls_bproc_child_t; -OBJ_CLASS_DECLARATION(odls_bproc_child_t); - -ORTE_MODULE_DECLSPEC extern orte_odls_bproc_component_t mca_odls_bproc_component; -extern orte_odls_base_module_t orte_odls_bproc_module; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* ORTE_ODLS_BPROC_H_ */ - diff --git a/orte/mca/odls/bproc/odls_bproc_component.c b/orte/mca/odls/bproc/odls_bproc_component.c deleted file mode 100644 index 9bd410f52e..0000000000 --- a/orte/mca/odls/bproc/odls_bproc_component.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -/** - * @file: - * Takes care of the component stuff for the MCA. - */ -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" - -#include "orte/mca/odls/odls.h" -#include "odls_bproc.h" - -/* instance the child list object */ -static void odls_bproc_child_constructor(odls_bproc_child_t *ptr) -{ - ptr->name = NULL; - ptr->app_idx = -1; - ptr->alive = false; - ptr->local_rank = ORTE_VPID_INVALID; - ptr->num_procs = 0; -} -static void odls_bproc_child_destructor(odls_bproc_child_t *ptr) -{ - if (NULL != ptr->name) free(ptr->name); -} -OBJ_CLASS_INSTANCE(odls_bproc_child_t, - opal_list_item_t, - odls_bproc_child_constructor, - odls_bproc_child_destructor); - -/** - * The bproc component data structure used to store all the relevent data - * about this component. - */ -orte_odls_bproc_component_t mca_odls_bproc_component = { - { - /* First, the mca_component_t struct containing meta information - about the component itself */ - { - /* Indicate that we are a odls v1.3.0 component (which also - implies a specific MCA version) */ - ORTE_ODLS_BASE_VERSION_1_3_0, - /* Component name and version */ - "bproc", - ORTE_MAJOR_VERSION, - ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION, - /* Component open and close functions */ - orte_odls_bproc_component_open, - orte_odls_bproc_component_close - }, - /* Next the MCA v1.0.0 component meta data */ - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - /* Initialization / querying functions */ - orte_odls_bproc_init, - orte_odls_bproc_finalize - } -}; - -/** - * Opens the pls_bproc component, setting all the needed mca parameters and - * finishes setting up the component struct. - */ -int orte_odls_bproc_component_open(void) -{ - /* initialize globals */ - OBJ_CONSTRUCT(&mca_odls_bproc_component.lock, opal_mutex_t); - OBJ_CONSTRUCT(&mca_odls_bproc_component.cond, opal_condition_t); - OBJ_CONSTRUCT(&mca_odls_bproc_component.children, opal_list_t); - - /* lookup parameters */ - mca_base_param_reg_int(&mca_odls_bproc_component.super.version, - "priority", NULL, false, false, 100, - &mca_odls_bproc_component.priority); - mca_base_param_reg_int(&mca_odls_bproc_component.super.version, - "debug", "If > 0 prints library debugging information", - false, false, 0, &mca_odls_bproc_component.debug); - return ORTE_SUCCESS; -} - -/** - * Initializes the module. We do not want to run unless we are not the seed, - * bproc is running, and we are not on the master node. - */ -orte_odls_base_module_t *orte_odls_bproc_init(int *priority) -{ - int ret; - struct bproc_version_t version; - - /* the base open/select logic protects us against operation when - * we are NOT in a daemon, so we don't have to check that here - */ - - /* check to see if BProc is running here */ - ret = bproc_version(&version); - if (ret != 0) { - return NULL; - } - - *priority = mca_odls_bproc_component.priority; - return &orte_odls_bproc_module; -} - -/** - * Component close function. - */ -int orte_odls_bproc_component_close(void) -{ - OBJ_DESTRUCT(&mca_odls_bproc_component.lock); - OBJ_DESTRUCT(&mca_odls_bproc_component.cond); - OBJ_DESTRUCT(&mca_odls_bproc_component.children); - return ORTE_SUCCESS; -} diff --git a/orte/mca/odls/default/odls_default.h b/orte/mca/odls/default/odls_default.h index 1cf44f33e6..ed2c76ea86 100644 --- a/orte/mca/odls/default/odls_default.h +++ b/orte/mca/odls/default/odls_default.h @@ -26,15 +26,9 @@ #include "opal/mca/mca.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/rmaps/rmaps_types.h" - #include "orte/mca/odls/odls.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /* * Module open / close @@ -54,7 +48,6 @@ int orte_odls_default_finalize(void); extern orte_odls_base_module_t orte_odls_default_module; ORTE_MODULE_DECLSPEC extern orte_odls_base_component_t mca_odls_default_component; -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS + #endif /* ORTE_ODLS_H */ diff --git a/orte/mca/odls/default/odls_default_component.c b/orte/mca/odls/default/odls_default_component.c index 58c633e0fc..06d7fdb236 100644 --- a/orte/mca/odls/default/odls_default_component.c +++ b/orte/mca/odls/default/odls_default_component.c @@ -23,7 +23,7 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include #ifdef HAVE_UNISTD_H @@ -31,15 +31,6 @@ #endif #include -#include "opal/util/argv.h" -#include "opal/util/path.h" -#include "opal/util/basename.h" -#include "opal/util/show_help.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - #include "orte/mca/odls/odls.h" #include "orte/mca/odls/base/odls_private.h" #include "orte/mca/odls/default/odls_default.h" diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 22af8ecc77..98b7886543 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -21,7 +21,7 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include #ifdef HAVE_UNISTD_H @@ -63,36 +63,14 @@ #endif #endif /* HAVE_SCHED_YIELD */ -#include "opal/event/event.h" -#include "opal/util/argv.h" #include "opal/util/output.h" -#include "opal/util/os_path.h" #include "opal/util/show_help.h" -#include "opal/util/path.h" -#include "opal/util/basename.h" -#include "opal/util/opal_environ.h" -#include "opal/mca/base/mca_base_param.h" -#include "opal/util/num_procs.h" -#include "opal/util/sys_limits.h" -#include "orte/dss/dss.h" -#include "orte/util/sys_info.h" -#include "orte/util/univ_info.h" -#include "orte/util/session_dir.h" #include "orte/runtime/orte_wait.h" -#include "orte/runtime/params.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/iof_base_setup.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/sds/base/base.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/routed/routed.h" +#include "orte/util/name_fns.h" #include "orte/mca/odls/base/odls_private.h" #include "orte/mca/odls/default/odls_default.h" @@ -100,7 +78,7 @@ /* * External Interface */ -static int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data); +static int orte_odls_default_launch_local_procs(opal_buffer_t *data); static int orte_odls_default_kill_local_procs(orte_jobid_t job, bool set_state); static int orte_odls_default_signal_local_procs(const orte_process_name_t *proc, int32_t signal); @@ -112,7 +90,6 @@ orte_odls_base_module_t orte_odls_default_module = { orte_odls_default_kill_local_procs, orte_odls_default_signal_local_procs, orte_odls_base_default_deliver_message, - orte_odls_base_default_extract_proc_map_info, orte_odls_base_default_require_sync }; @@ -131,12 +108,22 @@ static bool odls_default_child_died(pid_t pid, unsigned int timeout, int *exit_s if (pid == ret) { /* It died -- return success */ return true; + } else if (0 == ret) { + /* with NOHANG specified, if a process has already exited + * while waitpid was registered, then waitpid returns 0 + * as there is no error - this is a race condition problem + * that occasionally causes us to incorrectly report a proc + * as refusing to die. Unfortunately, errno may not be reset + * by waitpid in this case, so we cannot check it - just assume + * the proc has indeed died + */ + return true; } else if (-1 == ret && ECHILD == errno) { /* The pid no longer exists, so we'll call this "good enough for government work" */ return true; } - + #if defined(HAVE_SCHED_YIELD) sched_yield(); #else @@ -183,7 +170,6 @@ static int odls_default_fork_local_proc( orte_odls_child_t *child, char **environ_copy) { - pid_t pid; orte_iof_base_io_conf_t opts; int rc; sigset_t sigs; @@ -226,15 +212,15 @@ static int odls_default_fork_local_proc( } /* Fork off the child */ - pid = fork(); - if(pid < 0) { + child->pid = fork(); + if(child->pid < 0) { ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); child->state = ORTE_PROC_STATE_FAILED_TO_START; child->exit_code = ORTE_ERR_SYS_LIMITS_CHILDREN; return ORTE_ERR_SYS_LIMITS_CHILDREN; } - if (pid == 0) { + if (child->pid == 0) { long fd, fdmax = sysconf(_SC_OPEN_MAX); /* Setup the pipe to be close-on-exec */ @@ -315,7 +301,11 @@ static int odls_default_fork_local_proc( /* Other errno's are bad */ child->state = ORTE_PROC_STATE_FAILED_TO_START; child->exit_code = ORTE_ERR_PIPE_READ_FAILURE; - opal_output(orte_odls_globals.output, "odls: got code %d back from child", i); + + OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output, + "%s odls:default:fork got code %d back from child", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i)); + return ORTE_ERR_PIPE_READ_FAILURE; break; } else if (0 == rc) { @@ -332,14 +322,17 @@ static int odls_default_fork_local_proc( */ child->state = ORTE_PROC_STATE_FAILED_TO_START; child->exit_code = i; - opal_output(orte_odls_globals.output, "odls: got code %d back from child", i); + + OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output, + "%s odls:default:fork got code %d back from child", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i)); + return i; } } - /* set the proc state to LAUNCHED and save the pid */ + /* set the proc state to LAUNCHED */ child->state = ORTE_PROC_STATE_LAUNCHED; - child->pid = pid; child->alive = true; } @@ -351,26 +344,18 @@ static int odls_default_fork_local_proc( * Launch all processes allocated to the current node. */ -int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data) +int orte_odls_default_launch_local_procs(opal_buffer_t *data) { int rc; orte_std_cntr_t total_slots_alloc, num_local_procs; orte_jobid_t job; orte_vpid_t range; - opal_list_item_t *item; bool node_included; bool override_oversubscribed; bool oversubscribed; - opal_list_t app_context_list; + orte_std_cntr_t i, num_contexts; + orte_app_context_t **app_contexts; - /* We need to create a list of the app_contexts - * so we can know what to launch - the process info only gives - * us an index into the app_context array, not the app_context - * info itself. - */ - - OBJ_CONSTRUCT(&app_context_list, opal_list_t); - /* construct the list of children we are to launch */ if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job, &num_local_procs, @@ -379,8 +364,11 @@ int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data) &node_included, &oversubscribed, &override_oversubscribed, - &app_context_list))) { - ORTE_ERROR_LOG(rc); + &num_contexts, + &app_contexts))) { + OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output, + "%s odls:default:launch:local failed to construct child list on error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); goto CLEANUP; } @@ -391,22 +379,25 @@ int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data) } /* launch the local procs */ - if (ORTE_SUCCESS != (rc = orte_odls_base_default_launch_local(job, &app_context_list, + if (ORTE_SUCCESS != (rc = orte_odls_base_default_launch_local(job, + num_contexts, app_contexts, num_local_procs, range, total_slots_alloc, oversubscribed, override_oversubscribed, odls_default_fork_local_proc))) { - ORTE_ERROR_LOG(rc); + OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output, + "%s odls:default:launch:local failed to launch on error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); goto CLEANUP; } CLEANUP: /* cleanup */ - while (NULL != (item = opal_list_remove_first(&app_context_list))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&app_context_list); + for (i=0; i < num_contexts; i++) { + if (NULL != app_contexts[i]) OBJ_RELEASE(app_contexts[i]); + }; + if (NULL != app_contexts) free(app_contexts); return rc; } diff --git a/orte/mca/odls/odls.h b/orte/mca/odls/odls.h index 2538d3ea3e..126262becc 100644 --- a/orte/mca/odls/odls.h +++ b/orte/mca/odls/odls.h @@ -27,15 +27,15 @@ #define ORTE_MCA_ODLS_H #include "orte_config.h" +#include "orte/types.h" #include "opal/mca/mca.h" #include "opal/class/opal_list.h" -#include "orte/dss/dss_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/ns/ns_types.h" +#include "opal/dss/dss_types.h" #include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/rml/rml_types.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/odls/odls_types.h" @@ -47,21 +47,21 @@ extern "C" { #endif /* - * Construct a notify data object for use in adding local processes - * In order to reuse daemons, we need a way for the HNP to construct a notify_data object that + * Construct a buffer for use in adding local processes + * In order to reuse daemons, we need a way for the HNP to construct a buffer that * contains the data needed by the active ODLS component to launch a local process. Since the * only one that knows what a particular ODLS component needs is that component, we require an - * entry point that the HNP can call to get the required notify_data object. This is constructed + * entry point that the HNP can call to get the required buffer. This is constructed * for *all* nodes - the individual orteds then parse that data to find the specific launch info * for procs on their node */ -typedef int (*orte_odls_base_module_get_add_procs_data_fn_t)(orte_gpr_notify_data_t **data, - orte_job_map_t *map); +typedef int (*orte_odls_base_module_get_add_procs_data_fn_t)(opal_buffer_t *data, + orte_jobid_t job); /** * Locally launch the provided processes */ -typedef int (*orte_odls_base_module_launch_local_processes_fn_t)(orte_gpr_notify_data_t *data); +typedef int (*orte_odls_base_module_launch_local_processes_fn_t)(opal_buffer_t *data); /** * Kill the local processes on this node @@ -77,19 +77,13 @@ typedef int (*orte_odls_base_module_signal_local_process_fn_t)(const orte_proces /** * Deliver a message to local processes */ -typedef int (*orte_odls_base_module_deliver_message_fn_t)(orte_jobid_t job, orte_buffer_t *buffer, +typedef int (*orte_odls_base_module_deliver_message_fn_t)(orte_jobid_t job, opal_buffer_t *buffer, orte_rml_tag_t tag); -/** - * Extract the mapping of daemon-proc pair - */ -typedef int (*orte_odls_base_module_extract_proc_map_info_fn_t)(orte_process_name_t *daemon, - opal_list_t *proc_list, - orte_gpr_value_t *value); /** * Register to require sync before termination */ -typedef int (*orte_odls_base_module_require_sync_fn_t)(orte_process_name_t *proc); +typedef int (*orte_odls_base_module_require_sync_fn_t)(orte_process_name_t *proc, opal_buffer_t *buffer); /** * pls module version 1.3.0 @@ -100,7 +94,6 @@ struct orte_odls_base_module_1_3_0_t { orte_odls_base_module_kill_local_processes_fn_t kill_local_procs; orte_odls_base_module_signal_local_process_fn_t signal_local_procs; orte_odls_base_module_deliver_message_fn_t deliver_message; - orte_odls_base_module_extract_proc_map_info_fn_t extract_proc_map_info; orte_odls_base_module_require_sync_fn_t require_sync; }; diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index 92401cbcef..0dcf2fc876 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -21,41 +21,42 @@ #define ORTE_MCA_ODLS_TYPES_H #include "orte_config.h" -#include "orte/orte_types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /* define the orted command flag type */ typedef uint8_t orte_daemon_cmd_flag_t; -#define ORTE_DAEMON_CMD_T ORTE_UINT8 +#define ORTE_DAEMON_CMD_T OPAL_UINT8 /* * Definitions needed for communication */ -#define ORTE_DAEMON_HOSTFILE_CMD (orte_daemon_cmd_flag_t) 1 -#define ORTE_DAEMON_SCRIPTFILE_CMD (orte_daemon_cmd_flag_t) 2 -#define ORTE_DAEMON_CONTACT_QUERY_CMD (orte_daemon_cmd_flag_t) 3 -#define ORTE_DAEMON_KILL_LOCAL_PROCS (orte_daemon_cmd_flag_t) 4 -#define ORTE_DAEMON_SIGNAL_LOCAL_PROCS (orte_daemon_cmd_flag_t) 5 -#define ORTE_DAEMON_ADD_LOCAL_PROCS (orte_daemon_cmd_flag_t) 6 -#define ORTE_DAEMON_HEARTBEAT_CMD (orte_daemon_cmd_flag_t) 7 -#define ORTE_DAEMON_EXIT_CMD (orte_daemon_cmd_flag_t) 8 -#define ORTE_DAEMON_HALT_VM_CMD (orte_daemon_cmd_flag_t) 9 -#define ORTE_DAEMON_MESSAGE_LOCAL_PROCS (orte_daemon_cmd_flag_t) 10 -#define ORTE_DAEMON_ROUTE_NONE (orte_daemon_cmd_flag_t) 11 -#define ORTE_DAEMON_ROUTE_BINOMIAL (orte_daemon_cmd_flag_t) 12 -#define ORTE_DAEMON_WARMUP_LOCAL_CONN (orte_daemon_cmd_flag_t) 13 -#define ORTE_DAEMON_NULL_CMD (orte_daemon_cmd_flag_t) 14 -#define ORTE_DAEMON_SYNC_BY_PROC (orte_daemon_cmd_flag_t) 15 +#define ORTE_DAEMON_CONTACT_QUERY_CMD (orte_daemon_cmd_flag_t) 1 +#define ORTE_DAEMON_KILL_LOCAL_PROCS (orte_daemon_cmd_flag_t) 2 +#define ORTE_DAEMON_SIGNAL_LOCAL_PROCS (orte_daemon_cmd_flag_t) 3 +#define ORTE_DAEMON_ADD_LOCAL_PROCS (orte_daemon_cmd_flag_t) 4 +#define ORTE_DAEMON_HEARTBEAT_CMD (orte_daemon_cmd_flag_t) 5 +#define ORTE_DAEMON_EXIT_CMD (orte_daemon_cmd_flag_t) 6 +#define ORTE_DAEMON_PROCESS_AND_RELAY_CMD (orte_daemon_cmd_flag_t) 7 +#define ORTE_DAEMON_MESSAGE_LOCAL_PROCS (orte_daemon_cmd_flag_t) 8 +#define ORTE_DAEMON_ROUTE_NONE (orte_daemon_cmd_flag_t) 9 +#define ORTE_DAEMON_ROUTE_BINOMIAL (orte_daemon_cmd_flag_t) 10 +#define ORTE_DAEMON_WARMUP_LOCAL_CONN (orte_daemon_cmd_flag_t) 11 +#define ORTE_DAEMON_NULL_CMD (orte_daemon_cmd_flag_t) 12 +#define ORTE_DAEMON_SYNC_BY_PROC (orte_daemon_cmd_flag_t) 13 + +/* commands for use by tools */ +#define ORTE_DAEMON_REPORT_JOB_INFO_CMD (orte_daemon_cmd_flag_t) 14 +#define ORTE_DAEMON_REPORT_NODE_INFO_CMD (orte_daemon_cmd_flag_t) 15 +#define ORTE_DAEMON_REPORT_PROC_INFO_CMD (orte_daemon_cmd_flag_t) 16 +#define ORTE_DAEMON_ATTACH_STDOUT_CMD (orte_daemon_cmd_flag_t) 17 +#define ORTE_DAEMON_ATTACH_STDERR_CMD (orte_daemon_cmd_flag_t) 18 +#define ORTE_DAEMON_DETACH_STDOUT_CMD (orte_daemon_cmd_flag_t) 19 +#define ORTE_DAEMON_DETACH_STDERR_CMD (orte_daemon_cmd_flag_t) 20 +#define ORTE_DAEMON_SPAWN_JOB_CMD (orte_daemon_cmd_flag_t) 21 +#define ORTE_DAEMON_TERMINATE_JOB_CMD (orte_daemon_cmd_flag_t) 22 + +END_C_DECLS -/* define some useful attributes for dealing with orteds */ -#define ORTE_DAEMON_SOFT_KILL "orted-soft-kill" -#define ORTE_DAEMON_HARD_KILL "orted-hard-kill" - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif #endif diff --git a/orte/mca/odls/process/odls_process.h b/orte/mca/odls/process/odls_process.h index 30a1cf444c..5415e0e510 100755 --- a/orte/mca/odls/process/odls_process.h +++ b/orte/mca/odls/process/odls_process.h @@ -14,17 +14,11 @@ #include "orte_config.h" -#include "opal/threads/condition.h" #include "opal/mca/mca.h" -#include "orte/mca/rmgr/rmgr_types.h" - #include "orte/mca/odls/odls.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - +BEGIN_C_DECLS /* * Module open / close */ @@ -37,35 +31,12 @@ orte_odls_base_module_t* orte_odls_process_component_init(int *priority); */ int orte_odls_process_component_finalize(void); -/** - * ODLS Process globals - */ -typedef struct orte_odls_process_globals_t { - opal_mutex_t mutex; - opal_condition_t cond; - opal_list_t children; -} orte_odls_process_globals_t; - -extern orte_odls_process_globals_t orte_odls_process; - -/* - * List object to locally store app_contexts returned by the - * registry subscription. Since we don't know how many app_contexts will - * be returned, we need to store them on a list. - */ -typedef struct odls_process_app_context_t { - opal_list_item_t super; /* required to place this on a list */ - orte_app_context_t *app_context; -} odls_process_app_context_t; -OBJ_CLASS_DECLARATION(odls_process_app_context_t); - /* * ODLS Process module */ extern orte_odls_base_module_t orte_odls_process_module; ORTE_MODULE_DECLSPEC extern orte_odls_base_component_t mca_odls_process_component; -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS + #endif /* ORTE_ODLS_PROCESS_EXPORT_H */ diff --git a/orte/mca/odls/process/odls_process_component.c b/orte/mca/odls/process/odls_process_component.c index 9e5564b1df..e39ffe7b1e 100755 --- a/orte/mca/odls/process/odls_process_component.c +++ b/orte/mca/odls/process/odls_process_component.c @@ -11,6 +11,8 @@ */ #include "orte_config.h" +#include "orte/constants.h" + #include #ifdef HAVE_UNISTD_H #include @@ -19,21 +21,13 @@ #include "opal/util/argv.h" #include "opal/util/path.h" #include "opal/mca/base/mca_base_param.h" + #include "orte/util/proc_info.h" -#include "orte/orte_constants.h" + #include "orte/mca/odls/odls.h" +#include "orte/mca/odls/base/odls_private.h" #include "orte/mca/odls/process/odls_process.h" -/* Instantiate the component globals */ -orte_odls_process_globals_t orte_odls_process; - - -/* instance the app_context list object */ -OBJ_CLASS_INSTANCE(odls_process_app_context_t, - opal_list_item_t, - NULL, NULL); - - /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it @@ -74,21 +68,8 @@ orte_odls_base_component_t mca_odls_process_component = { orte_odls_process_component_finalize }; -bool orte_odls_process_debug = false; - int orte_odls_process_component_open(void) { - int tmp; - /* initialize globals */ - OBJ_CONSTRUCT(&orte_odls_process.mutex, opal_mutex_t); - OBJ_CONSTRUCT(&orte_odls_process.cond, opal_condition_t); - OBJ_CONSTRUCT(&orte_odls_process.children, opal_list_t); - - /* lookup parameters */ - mca_base_param_reg_int( &mca_odls_process_component.version, "debug", - "Whether or not to enable debugging output for the process odls component (0 or 1)", - false, false, false, &tmp); - orte_odls_process_debug = OPAL_INT_TO_BOOL(tmp); return ORTE_SUCCESS; } @@ -105,9 +86,6 @@ orte_odls_base_module_t *orte_odls_process_component_init(int *priority) int orte_odls_process_component_close(void) { - OBJ_DESTRUCT(&orte_odls_process.mutex); - OBJ_DESTRUCT(&orte_odls_process.cond); - OBJ_DESTRUCT(&orte_odls_process.children); return ORTE_SUCCESS; } @@ -116,7 +94,7 @@ int orte_odls_process_component_finalize(void) opal_list_item_t *item; /* cleanup state */ - while (NULL != (item = opal_list_remove_first(&orte_odls_process.children))) { + while (NULL != (item = opal_list_remove_first(&orte_odls_globals.children))) { OBJ_RELEASE(item); } diff --git a/orte/mca/odls/process/odls_process_module.c b/orte/mca/odls/process/odls_process_module.c index 1096261732..a6c1459086 100755 --- a/orte/mca/odls/process/odls_process_module.c +++ b/orte/mca/odls/process/odls_process_module.c @@ -14,6 +14,7 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include #ifdef HAVE_UNISTD_H @@ -32,187 +33,24 @@ #include #endif /* HAVE_FCNTL_H */ -#include "opal/event/event.h" -#include "opal/util/argv.h" #include "opal/util/output.h" -#include "opal/util/os_path.h" #include "opal/util/show_help.h" -#include "opal/util/path.h" -#include "opal/util/basename.h" -#include "opal/util/opal_environ.h" -#include "opal/mca/base/mca_base_param.h" -#include "opal/util/num_procs.h" #include "opal/util/sys_limits.h" -#include "orte/dss/dss.h" -#include "orte/util/sys_info.h" -#include "orte/util/univ_info.h" -#include "orte/util/session_dir.h" #include "orte/runtime/orte_wait.h" -#include "orte/runtime/params.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/iof_base_setup.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/sds/base/base.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/smr/smr.h" +#include "orte/util/name_fns.h" #include "orte/mca/odls/base/odls_private.h" #include "orte/mca/odls/process/odls_process.h" static void set_handler_default(int sig); -static int orte_odls_process_get_add_procs_data(orte_gpr_notify_data_t **data, - orte_job_map_t *map) -{ - orte_gpr_notify_data_t *ndat; - orte_gpr_value_t **values, *value; - orte_std_cntr_t cnt; - char *glob_tokens[] = { - ORTE_JOB_GLOBALS, - NULL - }; - char *glob_keys[] = { - ORTE_JOB_APP_CONTEXT_KEY, - ORTE_JOB_VPID_START_KEY, - ORTE_JOB_VPID_RANGE_KEY, - ORTE_JOB_TOTAL_SLOTS_ALLOC_KEY, - NULL - }; - opal_list_item_t *item, *m_item; - orte_mapped_node_t *node; - orte_mapped_proc_t *proc; - int rc; - char *segment; - - /* set default answer */ - *data = NULL; - - ndat = OBJ_NEW(orte_gpr_notify_data_t); - if (NULL == ndat) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* construct a fake trigger name so that the we can extract the jobid from it later */ - if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(ndat->target), "bogus", map->job))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* get the segment name */ - if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, map->job))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* get the info from the job globals container first */ - if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_TOKENS_AND | ORTE_GPR_KEYS_OR, - segment, glob_tokens, glob_keys, &cnt, &values))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - return rc; - } - - /* there can only be one value here since we only specified a single container. - * Just transfer the returned value to the ndat structure - */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&cnt, ndat->values, values[0]))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(values[0]); - return rc; - } - ndat->cnt = 1; - - /* the remainder of our required info is in the mapped_node objects, so all we - * have to do is transfer it over - */ - for (m_item = opal_list_get_first(&map->nodes); - m_item != opal_list_get_end(&map->nodes); - m_item = opal_list_get_next(m_item)) { - node = (orte_mapped_node_t*)m_item; - - for (item = opal_list_get_first(&node->procs); - item != opal_list_get_end(&node->procs); - item = opal_list_get_next(item)) { - proc = (orte_mapped_proc_t*)item; - - /* must not have any tokens so that launch_procs can process it correctly */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, 0, segment, 3, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), - ORTE_PROC_NAME_KEY, - ORTE_NAME, &proc->name))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), - ORTE_PROC_APP_CONTEXT_KEY, - ORTE_STD_CNTR, &proc->app_idx))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), - ORTE_NODE_NAME_KEY, - ORTE_STRING, node->nodename))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), - ORTE_PROC_LOCAL_RANK_KEY, - ORTE_VPID, &proc->local_rank))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), - ORTE_NODE_NUM_PROCS_KEY, - ORTE_STD_CNTR, &node->num_procs))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(value); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&cnt, ndat->values, value))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ndat); - OBJ_RELEASE(values[0]); - return rc; - } - ndat->cnt += 1; - } - } - - *data = ndat; - return ORTE_SUCCESS; -} - -static bool orte_odls_process_child_died( pid_t pid, unsigned int timeout, - int* exit_status ) +static bool odls_process_child_died( pid_t pid, unsigned int timeout, + int* exit_status ) { int error; HANDLE handle = OpenProcess( PROCESS_TERMINATE | SYNCHRONIZE, FALSE, @@ -227,7 +65,7 @@ static bool orte_odls_process_child_died( pid_t pid, unsigned int timeout, return false; } -static int orte_odls_process_kill_local( pid_t pid, int sig_num ) +static int odls_process_kill_local( pid_t pid, int sig_num ) { if( false == TerminateProcess( (HANDLE)pid, 1 ) ) { return (int)GetLastError(); @@ -235,295 +73,33 @@ static int orte_odls_process_kill_local( pid_t pid, int sig_num ) return 0; } -static int orte_odls_process_kill_local_procs(orte_jobid_t job, bool set_state) +static int odls_process_kill_local_procs(orte_jobid_t job, bool set_state) { - orte_odls_child_t *child; - opal_list_item_t *item, *next; - int rc, exit_status = -1; - opal_list_t procs_killed; - orte_namelist_t *proc; - - OBJ_CONSTRUCT(&procs_killed, opal_list_t); - - opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: working on job %ld", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)job); - - /* since we are going to be working with the global list of - * children, we need to protect that list from modification - * by other threads - */ - OPAL_THREAD_LOCK(&orte_odls_process.mutex); - - for (item = opal_list_get_first(&orte_odls_process.children); - item != opal_list_get_end(&orte_odls_process.children); - item = next) { - child = (orte_odls_child_t*)item; - - /* preserve the pointer to the next item in list in case we release it */ - next = opal_list_get_next(item); - - opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: checking child process %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)); - - /* do we have a child from the specified job? Because the - * job could be given as a WILDCARD value, we must use - * the dss.compare function to check for equality. - */ - if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { - continue; - } - - /* remove the child from the list since it is either already dead or soon going to be dead */ - opal_list_remove_item(&orte_odls_process.children, item); - - /* is this process alive? if not, then nothing for us - * to do to it - */ - if (!child->alive) { - opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: child %s is not alive", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)); - /* ensure, though, that the state is terminated so we don't lockup if - * the proc never started - */ - goto MOVEON; - } - - /* de-register the SIGCHILD callback for this pid */ - if (ORTE_SUCCESS != (rc = orte_wait_cb_cancel(child->pid))) { - /* no need to error_log this - it just means that the pid is already gone */ - goto MOVEON; - } - - /* Send a sigterm to the process. */ - if (0 != orte_odls_process_kill_local(child->pid, SIGTERM)) { - int err = GetLastError(); - opal_show_help("help-odls-default.txt", - "odls-default:could-not-send-kill", - true, orte_system_info.nodename, child->pid, err); - goto MOVEON; - } - - /* The kill succeeded. Wait up to timeout_before_sigkill - seconds to see if it died. */ - - if (!orte_odls_process_child_died(child->pid, orte_odls_globals.timeout_before_sigkill, &exit_status)) { - /* try killing it again */ - orte_odls_process_kill_local(child->pid, SIGKILL); - /* Double check that it actually died this time */ - if (!orte_odls_process_child_died(child->pid, orte_odls_globals.timeout_before_sigkill, &exit_status)) { - opal_show_help("help-odls-default.txt", - "odls-default:could-not-kill", - true, orte_system_info.nodename, child->pid); - } - } - -MOVEON: - /* set the process to "not alive" */ - child->alive = false; - - /* add this proc to the local list */ - proc = OBJ_NEW(orte_namelist_t); - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(proc->name), child->name, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - return rc; - } - opal_list_append(&procs_killed, &proc->item); - - /* release the object since we killed it */ - OBJ_RELEASE(child); - } - - /* we are done with the global list, so we can now release - * any waiting threads - this also allows any callbacks to work - */ - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - - /* deconstruct the local list and update the process states on the registry, if indicated */ - while (NULL != (item = opal_list_remove_first(&procs_killed))) { - proc = (orte_namelist_t*)item; - if (set_state) { - if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(proc->name, ORTE_PROC_STATE_TERMINATED, exit_status))) { - ORTE_ERROR_LOG(rc); - /* don't exit out even if this didn't work - we still might need to kill more - * processes, so just keep trucking - */ - } - } - OBJ_RELEASE(proc); - } - - OBJ_DESTRUCT(&procs_killed); - - return ORTE_SUCCESS; -} - -/* - * Wait for a callback indicating the child has completed. - */ -static void odls_process_wait_local_proc(pid_t pid, int status, void* cbdata) -{ - orte_odls_child_t *child; - opal_list_item_t *item; - bool aborted; - char *job, *vpid, *abort_file; - struct _stat buf; int rc; - - opal_output(orte_odls_globals.output, "odls: child process terminated"); - - /* since we are going to be working with the global list of - * children, we need to protect that list from modification - * by other threads. This will also be used to protect us - * from race conditions on any abort situation - */ - OPAL_THREAD_LOCK(&orte_odls_process.mutex); - - /* find this child */ - for (item = opal_list_get_first(&orte_odls_process.children); - item != opal_list_get_end(&orte_odls_process.children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - if (child->alive && pid == child->pid) { /* found it */ - goto GOTCHILD; - } - } - /* get here if we didn't find the child, or if the specified child is already - * dead. If the latter, then we have a problem as it means we are detecting - * it exiting multiple times - */ - opal_output(orte_odls_globals.output, "odls: did not find pid %ld in table!", (long) pid); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - return; - -GOTCHILD: - /* If this child was the (vpid==0), we hooked it up to orterun's - STDIN SOURCE earlier (do not change this without also changing - odsl_process_fork_local_proc()). So we have to tell the SOURCE - a) that we don't want any more data and b) that it should not - expect any more ACKs from this endpoint (so that the svc - component can still flush/shut down cleanly). - - Note that the source may have already detected that this - process died as part of an OOB/RML exception, but that's ok -- - its "exception" detection capabilities are not reliable, so we - *have* to do this unpublish here, even if it arrives after an - exception is detected and handled (in which case this unpublish - request will be ignored/discarded. */ - opal_output(orte_odls_globals.output, - "odls: pid %ld corresponds to %s\n", - (long) pid, ORTE_NAME_PRINT(child->name)); -#if 0 - if (0 == child->name->vpid) { - rc = orte_iof.iof_unpublish(child->name, ORTE_NS_CMP_ALL, - ORTE_IOF_STDIN); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - /* We can't really abort, so keep going... */ - } - } - opal_output(orte_odls_globals.output, "orted sent IOF unpub message!\n"); -#endif - /* determine the state of this process */ - aborted = false; - if(WIFEXITED(status)) { - /* even though the process exited "normally", it is quite - * possible that this happened via an orte_abort call - in - * which case, we need to indicate this was an "abnormal" - * termination. See the note in "orte_abort.c" for - * an explanation of this process. - * - * For our purposes here, we need to check for the existence - * of an "abort" file in this process' session directory. If - * we find it, then we know that this was an abnormal termination. - */ - if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&job, child->name->jobid))) { - ORTE_ERROR_LOG(rc); - goto MOVEON; - } - if (ORTE_SUCCESS != (rc = orte_ns.convert_vpid_to_string(&vpid, child->name->vpid))) { - ORTE_ERROR_LOG(rc); - free(job); - goto MOVEON; - } - abort_file = opal_os_path(false, orte_process_info.universe_session_dir, - job, vpid, "abort", NULL ); - free(job); - free(vpid); - if (0 == _stat(abort_file, &buf)) { - /* the abort file must exist - there is nothing in it we need. It's - * meer existence indicates that an abnormal termination occurred - */ - opal_output(orte_odls_globals.output, "odls: child %s died by abort", - ORTE_NAME_PRINT(child->name)); - aborted = true; - free(abort_file); - } else { - opal_output(orte_odls_globals.output, "odls: child process %s terminated normally", - ORTE_NAME_PRINT(child->name)); - } - } else { - /* the process was terminated with a signal! That's definitely - * abnormal, so indicate that condition - */ - opal_output(orte_odls_globals.output, "odls: child process %s terminated with signal", - ORTE_NAME_PRINT(child->name)); - aborted = true; - } - -MOVEON: - /* set this proc to "not alive" */ - child->alive = false; - - /* Clean up the session directory as if we were the process - * itself. This covers the case where the process died abnormally - * and didn't cleanup its own session directory. - */ - orte_session_dir_finalize(child->name); - - /* set the proc state in the child structure */ - if (aborted) { - child->state = ORTE_PROC_STATE_ABORTED; - } else { - child->state = ORTE_PROC_STATE_TERMINATED; - } - - /* Need to unlock before we call set_proc_state as this is going to generate - * a trigger that will eventually callback to us - */ - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - - if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(child->name, child->state, status))) { + + if (ORTE_SUCCESS != (rc = orte_odls_base_default_kill_local_procs(job, set_state, + odls_process_kill_local, odls_process_child_died))) { ORTE_ERROR_LOG(rc); + return rc; } + return ORTE_SUCCESS; + } + /** * Fork/exec the specified processes */ -static int orte_odls_process_fork_local_proc( +static int odls_process_fork_local_proc( orte_app_context_t* context, orte_odls_child_t *child, - orte_vpid_t vpid_start, - orte_vpid_t vpid_range, - orte_std_cntr_t total_slots_alloc, - bool want_processor, - size_t processor, - bool oversubscribed) + char **environ_copy) { pid_t pid; orte_iof_base_io_conf_t opts; int rc; int i = 0; - char** environ_copy; - char *param, *param2; - char *uri; /* check the system limits - if we are at our max allowed children, then * we won't be allowed to do this anyway, so we may as well abort now. @@ -532,7 +108,7 @@ static int orte_odls_process_fork_local_proc( */ if (opal_sys_limits.initialized) { if (0 < opal_sys_limits.num_procs && - opal_sys_limits.num_procs <= (int)opal_list_get_size(&orte_odls_process.children)) { + opal_sys_limits.num_procs <= (int)opal_list_get_size(&orte_odls_globals.children)) { /* at the system limit - abort */ ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); child->state = ORTE_PROC_STATE_FAILED_TO_START; @@ -562,159 +138,6 @@ static int orte_odls_process_fork_local_proc( return rc; } - /* Try to change to the context cwd and check that the app - exists and is executable. The RMGR functions will print - out a pretty error message if either of these operations fails - */ - if (ORTE_SUCCESS != (i = orte_rmgr.check_context_cwd(context, true))) { - return ORTE_ERR_FATAL; - } - if (ORTE_SUCCESS != (i = orte_rmgr.check_context_app(context))) { - return ORTE_ERR_FATAL; - } - - /* setup base environment: copy the current environ and merge - in the app context environ */ - if (NULL != context->env) { - environ_copy = opal_environ_merge(orte_launch_environ, context->env); - } else { - environ_copy = opal_argv_copy(orte_launch_environ); - } - - /* special case handling for --prefix: this is somewhat icky, - but at least some users do this. :-\ It is possible that - when using --prefix, the user will also "-x PATH" and/or - "-x LD_LIBRARY_PATH", which would therefore clobber the - work that was done in the prior odls to ensure that we have - the prefix at the beginning of the PATH and - LD_LIBRARY_PATH. So examine the context->env and see if we - find PATH or LD_LIBRARY_PATH. If found, that means the - prior work was clobbered, and we need to re-prefix those - variables. */ - for (i = 0; NULL != context->env && NULL != context->env[i]; ++i) { - char *newenv; - - /* Reset PATH */ - if (0 == strncmp("PATH=", context->env[i], 5)) { - asprintf(&newenv, "%s\\bin;%s", - context->prefix_dir, context->env[i] + 5); - opal_setenv("PATH", newenv, true, &environ_copy); - free(newenv); - } - - /* Reset LD_LIBRARY_PATH */ - else if (0 == strncmp("LD_LIBRARY_PATH=", context->env[i], 16)) { - asprintf(&newenv, "%s/lib:%s", - context->prefix_dir, context->env[i] + 16); - opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ_copy); - free(newenv); - } - } - - param = mca_base_param_environ_variable("rmgr","bootproxy","jobid"); - opal_unsetenv(param, &environ_copy); - free(param); - - /* pass my contact info to the local proc so we can talk */ - uri = orte_rml.get_contact_info(); - param = mca_base_param_environ_variable("orte","local_daemon","uri"); - opal_setenv(param, uri, true, &environ_copy); - free(param); - free(uri); - - /* setup yield schedule and processor affinity - * We default here to always setting the affinity processor if we want - * it. The processor affinity system then determines - * if processor affinity is enabled/requested - if so, it then uses - * this value to select the process to which the proc is "assigned". - * Otherwise, the paffinity subsystem just ignores this value anyway - */ - if (oversubscribed) { - param = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle"); - opal_setenv(param, "1", false, &environ_copy); - } else { - param = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle"); - opal_setenv(param, "0", false, &environ_copy); - } - free(param); - - if (want_processor) { - param = mca_base_param_environ_variable("mpi", NULL, - "paffinity_processor"); - asprintf(¶m2, "%lu", (unsigned long) processor); - opal_setenv(param, param2, true, &environ_copy); - free(param); - free(param2); - } else { - param = mca_base_param_environ_variable("mpi", NULL, - "paffinity_processor"); - opal_unsetenv(param, &environ_copy); - free(param); - } - - /* setup universe info */ - if (NULL != orte_universe_info.name) { - param = mca_base_param_environ_variable("universe", NULL, NULL); - asprintf(&uri, "%s@%s:%s", orte_universe_info.uid, - orte_universe_info.host, - orte_universe_info.name); - opal_setenv(param, uri, true, &environ_copy); - free(param); - free(uri); - } - - /* setup ns contact info */ - if(NULL != orte_process_info.ns_replica_uri) { - uri = strdup(orte_process_info.ns_replica_uri); - } else { - uri = orte_rml.get_contact_info(); - } - param = mca_base_param_environ_variable("ns","replica","uri"); - opal_setenv(param, uri, true, &environ_copy); - free(param); - free(uri); - - /* setup gpr contact info */ - if(NULL != orte_process_info.gpr_replica_uri) { - uri = strdup(orte_process_info.gpr_replica_uri); - } else { - uri = orte_rml.get_contact_info(); - } - param = mca_base_param_environ_variable("gpr","replica","uri"); - opal_setenv(param, uri, true, &environ_copy); - free(param); - free(uri); - - /* set the app_context number into the environment */ - param = mca_base_param_environ_variable("orte","app","num"); - asprintf(¶m2, "%ld", (long)child->app_idx); - opal_setenv(param, param2, true, &environ_copy); - free(param); - free(param2); - - /* set the universe size in the environment */ - param = mca_base_param_environ_variable("orte","universe","size"); - asprintf(¶m2, "%ld", (long)total_slots_alloc); - opal_setenv(param, param2, true, &environ_copy); - free(param); - free(param2); - - - /* use same nodename as the starting daemon (us) */ - param = mca_base_param_environ_variable("orte", "base", "nodename"); - opal_setenv(param, orte_system_info.nodename, true, &environ_copy); - free(param); - - /* push name into environment */ - orte_ns_nds_env_put(child->name, vpid_start, vpid_range, - child->local_rank, child->num_procs, - &environ_copy); - - if (context->argv == NULL) { - context->argv = (char**)malloc(sizeof(char*)*2); - context->argv[0] = strdup(context->app); - context->argv[1] = NULL; - } #if 0 /* connect endpoints IOF */ rc = orte_iof_base_setup_parent(child->name, &opts); @@ -734,7 +157,7 @@ static int orte_odls_process_fork_local_proc( child->exit_code = ORTE_ERR_PIPE_READ_FAILURE; opal_show_help("help-orted-launcher.txt", "orted-launcher:execv-error", true, context->app, "TODO: some error"); - orte_smr.set_proc_state(child->name, ORTE_PROC_STATE_ABORTED, -1); + child->state = ORTE_PROC_STATE_ABORTED; return ORTE_ERROR; } pid = handle; @@ -753,411 +176,62 @@ static int orte_odls_process_fork_local_proc( * Launch all processes allocated to the current node. */ -static int orte_odls_process_launch_local_procs(orte_gpr_notify_data_t *data) +static int odls_process_launch_local_procs(opal_buffer_t *data) { int rc; - orte_std_cntr_t i, j, kv, kv2, *sptr, total_slots_alloc; - orte_gpr_value_t *value, **values; - orte_gpr_keyval_t *kval; - orte_app_context_t *app; + orte_std_cntr_t total_slots_alloc, num_local_procs; orte_jobid_t job; - orte_vpid_t *vptr, start, range; - char *node_name; - opal_list_t app_context_list; - orte_odls_child_t *child; - odls_process_app_context_t *app_item; - size_t num_processors; - bool oversubscribed=false, want_processor, *bptr, override_oversubscribed=false; - opal_list_item_t *item, *item2; - bool quit_flag; + orte_vpid_t range; bool node_included; - char *job_str, *uri_file, *my_uri, *session_dir=NULL; - FILE *fp; + bool override_oversubscribed; + bool oversubscribed; + orte_std_cntr_t i, num_contexts; + orte_app_context_t **app_contexts; - /* parse the returned data to create the required structures - * for a fork launch. Since the data will contain information - * on procs for ALL nodes, we first have to find the value - * struct that contains info for our node. - */ - - /* first, retrieve the job number we are to launch from the - * returned data - we can extract the jobid directly from the - * subscription name we created - */ - if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, data->target))) { - ORTE_ERROR_LOG(rc); - return rc; + /* construct the list of children we are to launch */ + if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job, + &num_local_procs, + &range, + &total_slots_alloc, + &node_included, + &oversubscribed, + &override_oversubscribed, + &num_contexts, + &app_contexts))) { + OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output, + "%s odls:process:launch:local failed to construct child list on error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); + goto CLEANUP; } - - opal_output(orte_odls_globals.output, "odls: setting up launch for job %ld", (long)job); - - /* We need to create a list of the app_contexts - * so we can know what to launch - the process info only gives - * us an index into the app_context array, not the app_context - * info itself. - */ - OBJ_CONSTRUCT(&app_context_list, opal_list_t); - - /* set the default values to INVALID */ - start = ORTE_VPID_INVALID; - range = ORTE_VPID_INVALID; - - /* set the flag indicating this node is not included in the launch data */ - node_included = false; - - values = (orte_gpr_value_t**)(data->values)->addr; - for (j=0, i=0; i < data->cnt && j < (data->values)->size; j++) { /* loop through all returned values */ - if (NULL != values[j]) { - i++; - value = values[j]; - - if (NULL != value->tokens) { - /* this came from the globals container, so it must contain - * the app_context(s), vpid_start, and vpid_range entries. Only one - * value object should ever come from that container - */ - for (kv=0; kv < value->cnt; kv++) { - kval = value->keyvals[kv]; - if (strcmp(kval->key, ORTE_JOB_VPID_START_KEY) == 0) { - /* this can only occur once, so just store it */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, kval->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - start = *vptr; - continue; - } - if (strcmp(kval->key, ORTE_JOB_VPID_RANGE_KEY) == 0) { - /* this can only occur once, so just store it */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, kval->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - range = *vptr; - continue; - } - if (strcmp(kval->key, ORTE_JOB_APP_CONTEXT_KEY) == 0) { - /* this can occur multiple times since we allow multiple - * app_contexts on the orterun command line. Add them - * to the list - */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&app, kval->value, ORTE_APP_CONTEXT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - app_item = OBJ_NEW(odls_process_app_context_t); - if (NULL == app_item) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - app_item->app_context = app; - opal_list_append(&app_context_list, &app_item->super); - kval->value->data = NULL; /* protect the data storage from later release */ - } - if (strcmp(kval->key, ORTE_JOB_OVERSUBSCRIBE_OVERRIDE_KEY) == 0) { - /* this can only occur once, so just store it */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, kval->value, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - override_oversubscribed = *bptr; - continue; - } - if (strcmp(kval->key, ORTE_JOB_TOTAL_SLOTS_ALLOC_KEY) == 0) { - /* this can only occur once, so just store it */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - total_slots_alloc = *sptr; - continue; - } - } /* end for loop to process global data */ - } else { - /* this must have come from one of the process containers, so it must - * contain data for a proc structure - see if it - * belongs to this node - */ - for (kv=0; kv < value->cnt; kv++) { - kval = value->keyvals[kv]; - if (strcmp(kval->key, ORTE_NODE_NAME_KEY) == 0) { - /* Most C-compilers will bark if we try to directly compare the string in the - * kval data area against a regular string, so we need to "get" the data - * so we can access it */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&node_name, kval->value, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* if this is our node...must also protect against a zero-length string */ - if (NULL != node_name && 0 == strcmp(node_name, orte_system_info.nodename)) { - /* indicate that there is something for us to do */ - node_included = true; - - /* ...harvest the info into a new child structure */ - child = OBJ_NEW(orte_odls_child_t); - for (kv2 = 0; kv2 < value->cnt; kv2++) { - kval = value->keyvals[kv2]; - if(strcmp(kval->key, ORTE_PROC_NAME_KEY) == 0) { - /* copy the name into the child object */ - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(child->name), kval->value->data, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; - } - continue; - } - if(strcmp(kval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - child->app_idx = *sptr; /* save the index into the app_context objects */ - continue; - } - if(strcmp(kval->key, ORTE_PROC_LOCAL_RANK_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, kval->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - child->local_rank = *vptr; /* save the local_rank */ - continue; - } - if(strcmp(kval->key, ORTE_NODE_NUM_PROCS_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - child->num_procs = *sptr; /* save the number of procs from this job on this node */ - continue; - } - if(strcmp(kval->key, ORTE_NODE_OVERSUBSCRIBED_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, kval->value, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - oversubscribed = *bptr; - continue; - } - } /* kv2 */ - /* protect operation on the global list of children */ - OPAL_THREAD_LOCK(&orte_odls_process.mutex); - opal_list_append(&orte_odls_process.children, &child->super); - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - - } - } - } /* for kv */ - } - } /* for j */ - } - /* if there is nothing for us to do, just return */ if (!node_included) { - return ORTE_SUCCESS; + rc = ORTE_SUCCESS; + goto CLEANUP; } - /* record my uri in a file within the session directory so the local proc - * can contact me - */ - opal_output(orte_odls_globals.output, "odls: dropping local uri file"); - - /* put the file in the job session dir for the job being launched */ - orte_ns.convert_jobid_to_string(&job_str, job); - if (ORTE_SUCCESS != (rc = orte_session_dir(true, NULL, NULL, NULL, - NULL, NULL, job_str, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; + /* launch the local procs */ + if (ORTE_SUCCESS != (rc = orte_odls_base_default_launch_local(job, + num_contexts, app_contexts, + num_local_procs, + range, total_slots_alloc, + oversubscribed, + override_oversubscribed, + odls_process_fork_local_proc))) { + OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output, + "%s odls:process:launch:local failed to launch on error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); + goto CLEANUP; } - /* get the session dir name so we can put the file there */ - if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&session_dir, NULL, NULL, NULL, - NULL, NULL, NULL, job_str, NULL))) { - ORTE_ERROR_LOG(rc); - free(job_str); - return rc; - } - free(job_str); - - /* create the file and put my uri into it */ - uri_file = opal_os_path(false, session_dir, "orted-uri.txt", NULL); - fp = fopen(uri_file, "w"); - if (NULL == fp) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - return ORTE_ERR_FILE_OPEN_FAILURE; - } - my_uri = orte_rml.get_contact_info(); - fprintf(fp, "%s\n", my_uri); - fclose(fp); - free(uri_file); - free(my_uri); - - /* Now we preload any files that are needed. This is done on a per - * app context basis */ - for (item = opal_list_get_first(&app_context_list); - item != opal_list_get_end(&app_context_list); - item = opal_list_get_next(item)) { - app_item = (odls_process_app_context_t*)item; - if(app_item->app_context->preload_binary || - NULL != app_item->app_context->preload_files) { - if( ORTE_SUCCESS != (rc = orte_odls_base_preload_files_app_context(app_item->app_context)) ) { - ORTE_ERROR_LOG(rc); - } - } - } - - /* setup for processor affinity. If there are enough physical processors on this node, then - * we indicate which processor each process should be assigned to, IFF the user has requested - * processor affinity be used - the paffinity subsystem will make that final determination. All - * we do here is indicate that we should do the definitions just in case paffinity is active - */ - if (OPAL_SUCCESS != opal_get_num_processors( (int *) &num_processors)) { - /* if we cannot find the number of local processors, then default to conservative - * settings - */ - want_processor = false; /* default to not being a hog */ - opal_output(orte_odls_globals.output, - "odls: could not get number of processors - using conservative settings"); - } else { - opal_output(orte_odls_globals.output, - "odls: got %ld processors", (long)num_processors); - - /* only do this if we can actually get info on the number of processors */ - if (opal_list_get_size(&orte_odls_process.children) > (size_t)num_processors) { - want_processor = false; - } else { - want_processor = true; - } - - /* now let's deal with the oversubscribed flag - and the use-case where a hostfile or some - * other non-guaranteed-accurate method was used to inform us about our allocation. Since - * the information on the number of slots on this node could have been incorrect, we need - * to check it against the local number of processors to ensure we don't overload them - */ - if (override_oversubscribed) { - opal_output(orte_odls_globals.output, "odls: overriding oversubscription"); - if (opal_list_get_size(&orte_odls_process.children) > (size_t)num_processors) { - /* if the #procs > #processors, declare us oversubscribed regardless - * of what the mapper claimed - the user may have told us something - * incorrect - */ - oversubscribed = true; - } else { - /* likewise, if there are more processors here than we were told, - * declare us to not be oversubscribed so we can be aggressive. This - * covers the case where the user didn't tell us anything about the - * number of available slots, so we defaulted to a value of 1 - */ - oversubscribed = false; - } - } - } - opal_output(orte_odls_globals.output, "odls: oversubscribed set to %s want_processor set to %s", - oversubscribed ? "true" : "false", want_processor ? "true" : "false"); - - /* okay, now let's launch our local procs using a fork/exec */ - i = 0; - /* protect operations involving the global list of children */ - OPAL_THREAD_LOCK(&orte_odls_process.mutex); - - quit_flag = false; - for( item = opal_list_get_first(&orte_odls_process.children); - ((item != opal_list_get_end(&orte_odls_process.children)) && (false == quit_flag)); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - - /* is this child already alive? This can happen if - * we are asked to launch additional processes. - * If it has been launched, then do nothing - */ - if (child->alive) { - opal_output(orte_odls_globals.output, "odls: child %s is already alive", - ORTE_NAME_PRINT(child->name)); - continue; - } - - /* do we have a child from the specified job. Because the - * job could be given as a WILDCARD value, we must use - * the dss.compare function to check for equality. - */ - if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { - opal_output(orte_odls_globals.output, "odls: child %s is not in job %ld being launched", - ORTE_NAME_PRINT(child->name), (long)job); - continue; - } - - opal_output(orte_odls_globals.output, "odls: preparing to launch child %s", - ORTE_NAME_PRINT(child->name)); - - /* find the indicated app_context in the list */ - for (item2 = opal_list_get_first(&app_context_list); - item2 != opal_list_get_end(&app_context_list); - item2 = opal_list_get_next(item2)) { - app_item = (odls_process_app_context_t*)item2; - if (child->app_idx == app_item->app_context->idx) { - app = app_item->app_context; - goto DOFORK; - } - } - /* get here if we couldn't find the app_context */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - return ORTE_ERR_NOT_FOUND; - -DOFORK: - /* must unlock prior to fork to keep things clean in the - * event library - */ - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - - if (ORTE_SUCCESS != (rc = orte_odls_process_fork_local_proc(app, child, start, - range, total_slots_alloc, - want_processor, - i, oversubscribed))) { - /* do NOT ERROR_LOG this error - it generates - * a message/node as most errors will be common - * across the entire cluster. Instead, we let orterun - * output a consolidated error message for us - */ - child->state = ORTE_PROC_STATE_FAILED_TO_START; - quit_flag = true; - } - /* reaquire lock so we don't double unlock... */ - OPAL_THREAD_LOCK(&orte_odls_process.mutex); - i++; - } - - /* report the proc info and state in the registry */ - if (ORTE_SUCCESS != (rc = orte_odls_base_report_spawn(&orte_odls_process.children))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* setup the waitpids on the children */ - for (item = opal_list_get_first(&orte_odls_process.children); - item != opal_list_get_end(&orte_odls_process.children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - - if (ORTE_PROC_STATE_LAUNCHED == child->state) { - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - orte_wait_cb(child->pid, odls_process_wait_local_proc, NULL); - OPAL_THREAD_LOCK(&orte_odls_process.mutex); - child->state = ORTE_PROC_STATE_RUNNING; - } - } - +CLEANUP: /* cleanup */ - while (NULL != (item = opal_list_remove_first(&app_context_list))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&app_context_list); - - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - return ORTE_SUCCESS; + for (i=0; i < num_contexts; i++) { + if (NULL != app_contexts[i]) OBJ_RELEASE(app_contexts[i]); + }; + if (NULL != app_contexts) free(app_contexts); + + return rc; } static int send_signal(pid_t pid, int signal) @@ -1165,199 +239,22 @@ static int send_signal(pid_t pid, int signal) return ORTE_ERROR; } -static int orte_odls_process_signal_local_proc(const orte_process_name_t *proc, int32_t signal) +static int odls_process_signal_local_proc(const orte_process_name_t *proc, int32_t signal) { int rc; - opal_list_item_t *item; - orte_odls_child_t *child; - - /* protect operations involving the global list of children */ - OPAL_THREAD_LOCK(&orte_odls_process.mutex); - - /* if procs is NULL, then we want to signal all - * of the local procs, so just do that case - */ - if (NULL == proc) { - rc = ORTE_SUCCESS; /* pre-set this as an empty list causes us to drop to bottom */ - for (item = opal_list_get_first(&orte_odls_process.children); - item != opal_list_get_end(&orte_odls_process.children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - if (ORTE_SUCCESS != (rc = send_signal(child->pid, (int)signal))) { - ORTE_ERROR_LOG(rc); - } - } - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - return rc; - } - - /* we want it sent to some specified process, so find it */ - for (item = opal_list_get_first(&orte_odls_process.children); - item != opal_list_get_end(&orte_odls_process.children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - if (ORTE_EQUAL == orte_dss.compare(&(child->name), (void*)proc, ORTE_NAME)) { - /* unlock before signaling as this may generate a callback */ - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - if (ORTE_SUCCESS != (rc = send_signal(child->pid, (int)signal))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - } - - /* only way to get here is if we couldn't find the specified proc. - * report that as an error and return it - */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - return ORTE_ERR_NOT_FOUND; -} - -int orte_odls_process_deliver_message(orte_jobid_t job, orte_buffer_t *buffer, orte_rml_tag_t tag) -{ - int rc; - opal_list_item_t *item; - orte_odls_child_t *child; - - /* protect operations involving the global list of children */ - OPAL_THREAD_LOCK(&orte_odls_process.mutex); - - for (item = opal_list_get_first(&orte_odls_process.children); - item != opal_list_get_end(&orte_odls_process.children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - - /* do we have a child from the specified job. Because the - * job could be given as a WILDCARD value, we must use - * the dss.compare function to check for equality. - */ - if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { - continue; - } - opal_output(orte_odls_globals.output, "odls: sending message to tag %lu on child %s", - (unsigned long)tag, ORTE_NAME_PRINT(child->name)); - - /* if so, send the message */ - rc = orte_rml.send_buffer(child->name, buffer, tag, 0); - if (rc < 0) { - ORTE_ERROR_LOG(rc); - } - } - - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - return ORTE_SUCCESS; -} - -static int orte_odls_process_extract_proc_map_info(orte_process_name_t *daemon, - orte_process_name_t *proc, - orte_gpr_value_t *value) -{ - int rc; - orte_vpid_t *vptr; - -#if 0 - /*** NOTE: YOU WILL NEED TO REVISE THIS TO REFLECT HOW YOU STORED - THE DATA IN YOUR GET_ADD_PROCS_DATA ROUTINE. YOU MAY WISH - TO REVISE THAT ROUTINE, AND YOUR LAUNCH ROUTINE WHERE YOU PARSE - THAT DATA, TO REFLECT CHANGES IN THE DEFAULT COMPONENT AS SOME - EFFICIENCIES AND FEATURES HAVE BEEN ADDED - ****/ - - /* vpid of daemon that will host these procs is in first position */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[0]->value, ORTE_VPID))) { + + if (ORTE_SUCCESS != (rc = orte_odls_base_default_signal_local_procs(proc, signal, send_signal))) { ORTE_ERROR_LOG(rc); - return rc; } - daemon->vpid = *vptr; - - /* vpid of proc is in second position */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[1]->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - proc->vpid = *vptr; - - return ORTE_SUCCESS; -#endif - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int orte_odls_process_require_sync(orte_process_name_t *proc) -{ - orte_buffer_t buffer; - opal_list_item_t *item; - orte_odls_child_t *child; - int8_t dummy; - int rc; - bool found=false; - - - /* protect operations involving the global list of children */ - OPAL_THREAD_LOCK(&orte_odls_process.mutex); - - for (item = opal_list_get_first(&orte_odls_process.children); - item != opal_list_get_end(&orte_odls_process.children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - - /* find this child */ - if (ORTE_EQUAL == orte_dss.compare(proc, child->name, ORTE_NAME)) { - opal_output(orte_odls_globals.output, "odls: registering sync on child %s", - ORTE_NAME_PRINT(child->name)); - - child->sync_required = !child->sync_required; - found = true; - break; - } - } - - /* if it wasn't found on the list, then we need to add it - must have - * come from a singleton - */ - if (!found) { - child = OBJ_NEW(orte_odls_child_t); - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&child->name, proc, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; - } - opal_list_append(&orte_odls_process.children, &child->super); - /* we don't know any other info about the child, so just indicate it's - * alive and set the sync - */ - child->alive = true; - child->sync_required = !child->sync_required; - } - - /* ack the call */ - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - orte_dss.pack(&buffer, &dummy, 1, ORTE_INT8); /* put anything in */ - opal_output(orte_odls_globals.output, "odls: sending sync ack to child %s", - ORTE_NAME_PRINT(proc)); - if (0 > (rc = orte_rml.send_buffer(proc, &buffer, ORTE_RML_TAG_SYNC, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buffer); - return rc; - } - OBJ_DESTRUCT(&buffer); - - opal_condition_signal(&orte_odls_process.cond); - OPAL_THREAD_UNLOCK(&orte_odls_process.mutex); - return ORTE_SUCCESS; + return rc; } - -orte_odls_base_module_1_3_0_t orte_odls_process_module = { - orte_odls_process_get_add_procs_data, - orte_odls_process_launch_local_procs, - orte_odls_process_kill_local_procs, - orte_odls_process_signal_local_proc, - orte_odls_process_deliver_message, - orte_odls_process_extract_proc_map_info, - orte_odls_process_require_sync +orte_odls_base_module_t orte_odls_process_module = { + orte_odls_base_default_get_add_procs_data, + odls_process_launch_local_procs, + odls_process_kill_local_procs, + odls_process_signal_local_proc, + orte_odls_base_default_deliver_message, + orte_odls_base_default_require_sync }; diff --git a/orte/mca/oob/Makefile.am b/orte/mca/oob/Makefile.am index 36b0999836..8ecee4fb64 100644 --- a/orte/mca/oob/Makefile.am +++ b/orte/mca/oob/Makefile.am @@ -24,7 +24,7 @@ libmca_oob_la_SOURCES = nobase_orte_HEADERS = # local files -headers = oob.h oob_types.h +headers = oob.h libmca_oob_la_SOURCES += $(headers) # Conditionally install the header files diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index cdcda1e1c7..c94b26c334 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -24,6 +24,7 @@ #define _MCA_OOB_BASE_H_ #include "orte_config.h" +#include "orte/types.h" #ifdef HAVE_UNISTD_H #include @@ -38,15 +39,8 @@ #include "orte/mca/oob/oob.h" #include "opal/mca/mca.h" -#include "orte/dss/dss_types.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/gpr/gpr_types.h" -#include "orte/mca/oob/oob_types.h" -#include "orte/mca/rml/rml_types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS /* * global flag for use in timing tests @@ -105,8 +99,6 @@ extern char* mca_oob_base_exclude; ORTE_DECLSPEC extern opal_list_t mca_oob_base_components; ORTE_DECLSPEC extern opal_list_t mca_oob_base_modules; -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/oob/base/oob_base_close.c b/orte/mca/oob/base/oob_base_close.c index e36c9e625a..3f35f294ab 100644 --- a/orte/mca/oob/base/oob_base_close.c +++ b/orte/mca/oob/base/oob_base_close.c @@ -18,10 +18,10 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include -#include "orte/orte_constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "orte/mca/oob/oob.h" diff --git a/orte/mca/oob/base/oob_base_init.c b/orte/mca/oob/base/oob_base_init.c index 49545147ec..2edcfc0a98 100644 --- a/orte/mca/oob/base/oob_base_init.c +++ b/orte/mca/oob/base/oob_base_init.c @@ -18,19 +18,15 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include #include -#include "orte/runtime/runtime.h" #include "opal/util/output.h" -#include "orte/util/proc_info.h" -#include "opal/util/argv.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" + #include "orte/mca/oob/oob.h" #include "orte/mca/oob/base/base.h" diff --git a/orte/mca/oob/base/oob_base_open.c b/orte/mca/oob/base/oob_base_open.c index 0c082fc043..13ed3ad13f 100644 --- a/orte/mca/oob/base/oob_base_open.c +++ b/orte/mca/oob/base/oob_base_open.c @@ -19,7 +19,7 @@ #include "orte_config.h" -#include "orte/orte_constants.h" +#include "orte/constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" diff --git a/orte/mca/oob/oob.h b/orte/mca/oob/oob.h index fa70ca88a9..81742caafc 100644 --- a/orte/mca/oob/oob.h +++ b/orte/mca/oob/oob.h @@ -24,6 +24,7 @@ #define MCA_OOB_H_ #include "orte_config.h" +#include "orte/types.h" #ifdef HAVE_UNISTD_H #include @@ -32,10 +33,7 @@ #include "opal/types.h" #include "opal/mca/mca.h" -#include "orte/mca/ns/ns_types.h" - #include "orte/mca/rml/rml.h" -#include "orte/mca/oob/oob_types.h" #include "opal/mca/crs/crs.h" #include "opal/mca/crs/base/base.h" diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index e6ebbc3d26..ebfe597547 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -24,8 +24,7 @@ */ #include "orte_config.h" - -#include "orte/orte_types.h" +#include "orte/types.h" #ifdef HAVE_UNISTD_H #include @@ -50,12 +49,14 @@ #include "opal/util/if.h" #include "opal/util/net.h" #include "opal/class/opal_hash_table.h" + #include "orte/class/orte_proc_table.h" -#include "orte/mca/oob/tcp/oob_tcp.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" #include "orte/mca/rml/rml.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/oob/tcp/oob_tcp.h" #if defined(__WINDOWS__) static opal_mutex_t windows_callback; @@ -473,7 +474,7 @@ static void mca_oob_tcp_accept(int incoming_sd) /* log the accept */ if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { opal_output(0, "%s mca_oob_tcp_accept: %s:%d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), opal_net_get_hostname((struct sockaddr*) &addr), opal_net_get_port((struct sockaddr*) &addr)); } @@ -721,7 +722,7 @@ static void* mca_oob_tcp_listen_thread(opal_object_t *obj) if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { opal_output(0, "%s mca_oob_tcp_listen_thread: (%d, %d) %s:%d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), item->fd, opal_socket_errno, inet_ntoa(item->addr.sin_addr), item->addr.sin_port); @@ -792,7 +793,7 @@ static int mca_oob_tcp_listen_progress(void) /* log the accept */ if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { opal_output(0, "%s mca_oob_tcp_listen_progress: %s:%d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), inet_ntoa(item->addr.sin_addr), item->addr.sin_port); } @@ -892,7 +893,7 @@ static void mca_oob_tcp_recv_probe(int sd, mca_oob_tcp_hdr_t* hdr) hdr->msg_type = MCA_OOB_TCP_PROBE; hdr->msg_dst = hdr->msg_src; - hdr->msg_src = *orte_process_info.my_name; + hdr->msg_src = *ORTE_PROC_MY_NAME; MCA_OOB_TCP_HDR_HTON(hdr); while(cnt < sizeof(mca_oob_tcp_hdr_t)) { @@ -900,7 +901,7 @@ static void mca_oob_tcp_recv_probe(int sd, mca_oob_tcp_hdr_t* hdr) if(retval < 0) { if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) { opal_output(0, "%s-%s mca_oob_tcp_peer_recv_probe: send() failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(hdr->msg_src)), strerror(opal_socket_errno), opal_socket_errno); @@ -926,33 +927,28 @@ static void mca_oob_tcp_recv_connect(int sd, mca_oob_tcp_hdr_t* hdr) /* now set socket up to be non-blocking */ if((flags = fcntl(sd, F_GETFL, 0)) < 0) { opal_output(0, "%s mca_oob_tcp_recv_handler: fcntl(F_GETFL) failed: %s (%d)", - ORTE_NAME_PRINT(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); } else { flags |= O_NONBLOCK; if(fcntl(sd, F_SETFL, flags) < 0) { opal_output(0, "%s mca_oob_tcp_recv_handler: fcntl(F_SETFL) failed: %s (%d)", - ORTE_NAME_PRINT(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); } } - /* check for invalid name - if this is true - we allocate a name from the name server - * and return to the peer + /* check for invalid name - if this is true, then we have an error */ - cmpval = orte_ns.compare_fields(ORTE_NS_CMP_ALL, &hdr->msg_src, ORTE_NAME_INVALID); - if (cmpval == ORTE_EQUAL) { - if (ORTE_SUCCESS != orte_ns.create_jobid(&hdr->msg_src.jobid, NULL)) { - return; - } - if (ORTE_SUCCESS != orte_ns.reserve_range(hdr->msg_src.jobid, 1, &hdr->msg_src.vpid)) { - return; - } + cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &hdr->msg_src, ORTE_NAME_INVALID); + if (cmpval == OPAL_EQUAL) { + ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); + return; } /* lookup the corresponding process */ peer = mca_oob_tcp_peer_lookup(&hdr->msg_src); if(NULL == peer) { opal_output(0, "%s mca_oob_tcp_recv_handler: unable to locate peer", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); CLOSE_THE_SOCKET(sd); return; } @@ -961,7 +957,7 @@ static void mca_oob_tcp_recv_connect(int sd, mca_oob_tcp_hdr_t* hdr) if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) { opal_output(0, "%s-%s mca_oob_tcp_recv_handler: " "rejected connection from %s connection state %d", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), ORTE_NAME_PRINT(&(hdr->msg_src)), peer->peer_state); @@ -1002,14 +998,14 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user) if(rc >= 0) { if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) { opal_output(0, "%s mca_oob_tcp_recv_handler: peer closed connection", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } CLOSE_THE_SOCKET(sd); return; } if(opal_socket_errno != EINTR) { opal_output(0, "%s mca_oob_tcp_recv_handler: recv() failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); CLOSE_THE_SOCKET(sd); return; } @@ -1026,7 +1022,7 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user) break; default: opal_output(0, "%s mca_oob_tcp_recv_handler: invalid message type: %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), hdr.msg_type); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hdr.msg_type); CLOSE_THE_SOCKET(sd); break; } @@ -1160,11 +1156,11 @@ int mca_oob_tcp_init(void) /* random delay to stagger connections back to seed */ #if defined(__WINDOWS__) if(1 == mca_oob_tcp_component.connect_sleep) { - Sleep((orte_process_info.my_name->vpid % randval % 1000) * 100); + Sleep((ORTE_PROC_MY_NAME->vpid % randval % 1000) * 100); } #elif defined(HAVE_USLEEP) if(1 == mca_oob_tcp_component.connect_sleep) { - usleep((orte_process_info.my_name->vpid % randval % 1000) * 1000); + usleep((ORTE_PROC_MY_NAME->vpid % randval % 1000) * 1000); } #endif @@ -1173,7 +1169,7 @@ int mca_oob_tcp_init(void) /* create a listen socket */ if ((OOB_TCP_LISTEN_THREAD == mca_oob_tcp_component.tcp_listen_type) && - orte_process_info.seed) { + orte_process_info.hnp) { if (mca_oob_tcp_create_listen_thread() != ORTE_SUCCESS) { opal_output(0, "mca_oob_tcp_init: unable to create listen thread"); return ORTE_ERROR; @@ -1187,7 +1183,7 @@ int mca_oob_tcp_init(void) opal_progress_register(mca_oob_tcp_listen_progress); if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) { opal_output(0, "%s accepting connections via listen thread", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } } else { /* fix up the listen_type, since we might have been in thread, @@ -1216,7 +1212,7 @@ int mca_oob_tcp_init(void) #endif if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) { opal_output(0, "%s accepting connections via event library", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } } @@ -1304,7 +1300,7 @@ int mca_oob_tcp_fini(void) int mca_oob_tcp_process_name_compare(const orte_process_name_t* n1, const orte_process_name_t* n2) { - return orte_ns.compare_fields(ORTE_NS_CMP_ALL, n1, n2); + return orte_util_compare_name_fields(ORTE_NS_CMP_ALL, n1, n2); } @@ -1549,7 +1545,7 @@ mca_oob_tcp_get_new_name(orte_process_name_t* name) if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { opal_output(0, "%s-%s mca_oob_tcp_get_new_name: starting\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name))); } @@ -1572,10 +1568,10 @@ mca_oob_tcp_get_new_name(orte_process_name_t* name) mca_oob_tcp_msg_wait(msg, &rc); if (ORTE_SUCCESS == rc) { - *name = *orte_process_info.my_name; + *name = *ORTE_PROC_MY_NAME; if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { opal_output(0, "%s mca_oob_tcp_get_new_name: done\n", - ORTE_NAME_PRINT(orte_process_info.my_name)); + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } } diff --git a/orte/mca/oob/tcp/oob_tcp.h b/orte/mca/oob/tcp/oob_tcp.h index 70c7067c5e..40edcb727b 100644 --- a/orte/mca/oob/tcp/oob_tcp.h +++ b/orte/mca/oob/tcp/oob_tcp.h @@ -25,21 +25,23 @@ #ifndef _MCA_OOB_TCP_H_ #define _MCA_OOB_TCP_H_ -#include "orte/mca/oob/oob.h" -#include "orte/mca/oob/base/base.h" +#include "orte_config.h" +#include "orte/types.h" + #include "opal/mca/base/base.h" -#include "orte/mca/ns/ns_types.h" #include "opal/class/opal_free_list.h" #include "opal/class/opal_hash_table.h" #include "opal/runtime/opal_progress.h" #include "opal/runtime/opal_cr.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" +#include "opal/mca/timer/base/base.h" + +#include "orte/mca/oob/oob.h" +#include "orte/mca/oob/base/base.h" #include "orte/mca/oob/tcp/oob_tcp_peer.h" #include "orte/mca/oob/tcp/oob_tcp_msg.h" -#include "opal/mca/timer/base/base.h" - BEGIN_C_DECLS @@ -172,13 +174,6 @@ int mca_oob_tcp_parse_uri( struct sockaddr* inaddr ); -/** - * Callback from registry on change to subscribed segments - */ -void mca_oob_tcp_registry_callback( - orte_gpr_notify_data_t* data, - void* cbdata); - /** * Setup socket options */ @@ -283,8 +278,6 @@ typedef struct mca_oob_tcp_device_t mca_oob_tcp_device_t; OBJ_CLASS_DECLARATION(mca_oob_tcp_device_t); - - END_C_DECLS #endif /* MCA_OOB_TCP_H_ */ diff --git a/orte/mca/oob/tcp/oob_tcp_addr.c b/orte/mca/oob/tcp/oob_tcp_addr.c index 2f92d0b3f2..77b592d648 100644 --- a/orte/mca/oob/tcp/oob_tcp_addr.c +++ b/orte/mca/oob/tcp/oob_tcp_addr.c @@ -17,6 +17,9 @@ */ #include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" + #ifdef HAVE_SYS_TYPES_H #include #endif @@ -30,13 +33,11 @@ #include #endif #include -#include "orte/orte_constants.h" + #include "opal/util/if.h" #include "opal/util/net.h" -#include "orte/mca/ns/ns_types.h" #include "orte/util/proc_info.h" -#include "orte/dss/dss.h" #include "oob_tcp.h" #include "oob_tcp_addr.h" diff --git a/orte/mca/oob/tcp/oob_tcp_addr.h b/orte/mca/oob/tcp/oob_tcp_addr.h index cdd40341b0..e45f1b1692 100644 --- a/orte/mca/oob/tcp/oob_tcp_addr.h +++ b/orte/mca/oob/tcp/oob_tcp_addr.h @@ -24,15 +24,18 @@ #define _MCA_OOB_TCP_ADDR_H_ #include "orte_config.h" +#include "orte/types.h" + #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_NETINET_IN_H #include #endif -#include "orte/dss/dss.h" + #include "opal/class/opal_object.h" -#include "orte/mca/ns/ns_types.h" + +#include "opal/dss/dss.h" BEGIN_C_DECLS diff --git a/orte/mca/oob/tcp/oob_tcp_hdr.h b/orte/mca/oob/tcp/oob_tcp_hdr.h index e21f445c95..cc521fed2a 100644 --- a/orte/mca/oob/tcp/oob_tcp_hdr.h +++ b/orte/mca/oob/tcp/oob_tcp_hdr.h @@ -23,7 +23,10 @@ #ifndef _MCA_OOB_TCP_HDR_H_ #define _MCA_OOB_TCP_HDR_H_ -#include "orte/mca/ns/ns_types.h" +#include "orte_config.h" +#include "orte/types.h" + +BEGIN_C_DECLS #define MCA_OOB_TCP_PROBE 1 #define MCA_OOB_TCP_CONNECT 2 @@ -66,5 +69,7 @@ typedef struct mca_oob_tcp_hdr_t mca_oob_tcp_hdr_t; (h)->msg_size = htonl((h)->msg_size); \ (h)->msg_tag = htonl((h)->msg_tag); +END_C_DECLS + #endif /* _MCA_OOB_TCP_MESSAGE_H_ */ diff --git a/orte/mca/oob/tcp/oob_tcp_msg.c b/orte/mca/oob/tcp/oob_tcp_msg.c index 3e36b4a935..391d0bba69 100644 --- a/orte/mca/oob/tcp/oob_tcp_msg.c +++ b/orte/mca/oob/tcp/oob_tcp_msg.c @@ -24,11 +24,16 @@ */ #include "orte_config.h" +#include "orte/constants.h" + #include "opal/opal_socket_errno.h" #include "orte/class/orte_proc_table.h" -#include "orte/orte_constants.h" -#include "orte/mca/ns/ns.h" +#include "orte/util/name_fns.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/routed/routed.h" +#include "orte/runtime/orte_globals.h" + #include "orte/mca/oob/tcp/oob_tcp.h" #include "orte/mca/oob/tcp/oob_tcp_msg.h" @@ -269,7 +274,7 @@ bool mca_oob_tcp_msg_send_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee return false; else { opal_output(0, "%s-%s mca_oob_tcp_msg_send_handler: writev failed: %s (%d)", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); @@ -317,7 +322,7 @@ bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee msg->msg_rwbuf = malloc(msg->msg_hdr.msg_size); if(NULL == msg->msg_rwbuf) { opal_output(0, "%s-%s mca_oob_tcp_msg_recv_handler: malloc(%d) failed\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), msg->msg_hdr.msg_size); mca_oob_tcp_peer_close(peer); @@ -333,7 +338,7 @@ bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee } if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) { opal_output(0, "%s-%s (origin: %s) mca_oob_tcp_msg_recv_handler: size %lu\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), ORTE_NAME_PRINT(&(msg->msg_hdr.msg_origin)), (unsigned long)(msg->msg_hdr.msg_size) ); @@ -375,7 +380,7 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee else if (opal_socket_errno == EAGAIN || opal_socket_errno == EWOULDBLOCK) return false; opal_output(0, "%s-%s mca_oob_tcp_msg_recv: readv failed: %s (%d)", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); @@ -387,7 +392,7 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee } else if (rc == 0) { if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) { opal_output(0, "%s-%s mca_oob_tcp_msg_recv: peer closed connection", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name))); } mca_oob_tcp_peer_close(peer); @@ -434,7 +439,7 @@ void mca_oob_tcp_msg_recv_complete(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* p break; default: opal_output(0, "%s mca_oob_tcp_msg_recv_complete: invalid message type: %d from peer %s\n", - ORTE_NAME_PRINT(orte_process_info.my_name), msg->msg_hdr.msg_type, + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg->msg_hdr.msg_type, ORTE_NAME_PRINT(&peer->peer_name)); MCA_OOB_TCP_MSG_RETURN(msg); break; @@ -452,7 +457,7 @@ static void mca_oob_tcp_msg_ident(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pe orte_process_name_t src = msg->msg_hdr.msg_src; OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); - if (orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, &src) != ORTE_EQUAL) { + if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->peer_name, &src) != OPAL_EQUAL) { orte_hash_table_remove_proc(&mca_oob_tcp_component.tcp_peers, &peer->peer_name); peer->peer_name = src; orte_hash_table_set_proc(&mca_oob_tcp_component.tcp_peers, &peer->peer_name, peer); @@ -481,8 +486,32 @@ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee { /* attempt to match unexpected message to a posted recv */ mca_oob_tcp_msg_t* post; + int rc; OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); + if (ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) != + ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { + /* this message came from a different job family, so we may + * not know how to route any reply back to the originator. Update + * our route so we can dynamically build the routing table + */ + /* if the origin and the src are the same, then we don't need to do + * this - update_route was already called when the connection was + * established in oob_tcp_peer + */ + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + &(msg->msg_hdr.msg_origin), + &(msg->msg_hdr.msg_src))) { + if (ORTE_SUCCESS != (rc = orte_routed.update_route(&(msg->msg_hdr.msg_origin), + &(msg->msg_hdr.msg_src)))) { + /* Nothing we can do about errors here as we definitely want + * the receive to complete, but at least bark loudly + */ + ORTE_ERROR_LOG(rc); + } + } + } + /* match msg against posted receives */ post = mca_oob_tcp_msg_match_post(&msg->msg_hdr.msg_origin, msg->msg_hdr.msg_tag); if(NULL != post) { @@ -594,7 +623,7 @@ mca_oob_tcp_msg_t* mca_oob_tcp_msg_match_recv(orte_process_name_t* name, int tag msg != (mca_oob_tcp_msg_t*) opal_list_get_end(&mca_oob_tcp_component.tcp_msg_recv); msg = (mca_oob_tcp_msg_t*) opal_list_get_next(msg)) { - if(ORTE_EQUAL == orte_dss.compare(name, &msg->msg_hdr.msg_origin, ORTE_NAME)) { + if(OPAL_EQUAL == opal_dss.compare(name, &msg->msg_hdr.msg_origin, ORTE_NAME)) { if (tag == msg->msg_hdr.msg_tag) { return msg; } @@ -619,7 +648,7 @@ mca_oob_tcp_msg_t* mca_oob_tcp_msg_match_post(orte_process_name_t* name, int tag msg != (mca_oob_tcp_msg_t*) opal_list_get_end(&mca_oob_tcp_component.tcp_msg_post); msg = (mca_oob_tcp_msg_t*) opal_list_get_next(msg)) { - if(ORTE_EQUAL == orte_dss.compare(name, &msg->msg_peer, ORTE_NAME)) { + if(OPAL_EQUAL == opal_dss.compare(name, &msg->msg_peer, ORTE_NAME)) { if (msg->msg_hdr.msg_tag == tag) { if((msg->msg_flags & ORTE_RML_PERSISTENT) == 0) { opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_post, &msg->super.super); diff --git a/orte/mca/oob/tcp/oob_tcp_msg.h b/orte/mca/oob/tcp/oob_tcp_msg.h index a1f07bb315..fc2919ba84 100644 --- a/orte/mca/oob/tcp/oob_tcp_msg.h +++ b/orte/mca/oob/tcp/oob_tcp_msg.h @@ -23,17 +23,20 @@ #ifndef _MCA_OOB_TCP_MESSAGE_H_ #define _MCA_OOB_TCP_MESSAGE_H_ +#include "orte_config.h" +#include "orte/types.h" + +#include + #include "opal/class/opal_list.h" +#include "opal/util/output.h" + #include "orte/mca/oob/oob.h" #include "oob_tcp_peer.h" #include "oob_tcp_hdr.h" -#include -#include "opal/util/output.h" -#include "orte/mca/ns/ns_types.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS + struct mca_oob_tcp_peer_t; #define MCA_OOB_TCP_IOV_MAX 16 @@ -211,8 +214,7 @@ static inline void mca_oob_tcp_msg_iov_return(mca_oob_tcp_msg_t* msg, struct iov free(iov); } -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS + #endif /* _MCA_OOB_TCP_MESSAGE_H_ */ diff --git a/orte/mca/oob/tcp/oob_tcp_peer.c b/orte/mca/oob/tcp/oob_tcp_peer.c index cc0165bd8e..59b3c84732 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.c +++ b/orte/mca/oob/tcp/oob_tcp_peer.c @@ -24,6 +24,7 @@ */ #include "orte_config.h" + #ifdef HAVE_UNISTD_H #include #endif @@ -49,14 +50,14 @@ #endif #include "opal/mca/backtrace/backtrace.h" -#include "orte/class/orte_proc_table.h" #include "opal/util/output.h" #include "opal/util/if.h" #include "opal/util/net.h" -#include "orte/util/univ_info.h" +#include "opal/util/error.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/ns/ns.h" +#include "orte/class/orte_proc_table.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/routed/routed.h" @@ -223,7 +224,7 @@ mca_oob_tcp_peer_t * mca_oob_tcp_peer_lookup(const orte_process_name_t* name) OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); peer = (mca_oob_tcp_peer_t*)orte_hash_table_get_proc( &mca_oob_tcp_component.tcp_peers, name); - if (NULL != peer && 0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, name)) { + if (NULL != peer && 0 == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->peer_name, name)) { OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return peer; } @@ -302,7 +303,7 @@ mca_oob_tcp_peer_create_socket(mca_oob_tcp_peer_t* peer, if (peer->peer_sd < 0) { opal_output(0, "%s-%s mca_oob_tcp_peer_create_socket: socket() failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); @@ -320,7 +321,7 @@ mca_oob_tcp_peer_create_socket(mca_oob_tcp_peer_t* peer, if (peer->peer_sd >= 0) { if((flags = fcntl(peer->peer_sd, F_GETFL, 0)) < 0) { opal_output(0, "%s-%s mca_oob_tcp_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); @@ -328,7 +329,7 @@ mca_oob_tcp_peer_create_socket(mca_oob_tcp_peer_t* peer, flags |= O_NONBLOCK; if(fcntl(peer->peer_sd, F_SETFL, flags) < 0) opal_output(0, "%s-%s mca_oob_tcp_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); @@ -353,7 +354,7 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer) if(ORTE_SUCCESS != (rc = mca_oob_tcp_addr_get_next(peer->peer_addr, (struct sockaddr*) &inaddr))) { opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: " "mca_oob_tcp_addr_get_next failed with error=%d", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), rc); mca_oob_tcp_peer_close(peer); @@ -363,7 +364,7 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer) if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: " "connecting port %d to: %s:%d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), /* Bug, FIXME: output tcp6_listen_port for AF_INET6 */ ntohs(mca_oob_tcp_component.tcp_listen_port), @@ -407,7 +408,7 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer) ECONNREFUSED != opal_socket_errno)) { opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: " "connect to %s:%d failed: %s (%d)", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), opal_net_get_hostname((struct sockaddr*) &inaddr), opal_net_get_port((struct sockaddr*) &inaddr), @@ -426,7 +427,7 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer) opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: " "mca_oob_tcp_peer_send_connect_ack to %s:%d failed: %s (%d)", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), opal_net_get_hostname((struct sockaddr*) &inaddr), opal_net_get_port((struct sockaddr*) &inaddr), @@ -440,7 +441,7 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer) if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: " "Connection across all interfaces failed. Likely will retry", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name))); } mca_oob_tcp_peer_close(peer); @@ -486,7 +487,7 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd) /* check connect completion status */ if(getsockopt(sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) { opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: getsockopt() failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); @@ -502,7 +503,7 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd) if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: " "connection failed: %s (%d) - retrying\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(so_error), so_error); @@ -521,7 +522,7 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd) if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: " "sending ack, %d", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), so_error); } @@ -530,7 +531,7 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd) opal_event_add(&peer->peer_recv_event, 0); } else { opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: unable to send connect ack.", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name))); mca_oob_tcp_peer_close(peer); } @@ -568,22 +569,22 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t* peer) { if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { opal_output(0, "%s-%s mca_oob_tcp_peer_close(%p) sd %d state %d\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), (void*)peer, peer->peer_sd, peer->peer_state); } - /* if we lose the connection to the seed - abort */ - if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, ORTE_PROC_MY_HNP)) { + /* if we lose the connection to the HNP - abort */ + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->peer_name, ORTE_PROC_MY_HNP)) { /* If we are not already inside orte_finalize, then call abort */ - if (ORTE_UNIVERSE_STATE_FINALIZE > orte_universe_info.state) { + if (!orte_finalizing) { /* Should free the peer lock before we abort so we don't * get stuck in the orte_wait_kill when receiving messages in the * tcp OOB. */ OPAL_THREAD_UNLOCK(&peer->peer_lock); - orte_errmgr.error_detected(1, "OOB: Connection to HNP lost", NULL); + orte_errmgr.abort(1, "OOB: Connection to HNP lost"); } } @@ -597,7 +598,7 @@ void mca_oob_tcp_peer_shutdown(mca_oob_tcp_peer_t* peer) mca_oob_tcp_msg_t *msg; opal_output(0, "%s-%s oob-tcp: Communication retries exceeded. Can not communicate with peer", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name))); /* There are cases during the initial connection setup where @@ -646,11 +647,7 @@ static int mca_oob_tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer, int sd) */ mca_oob_tcp_hdr_t hdr; memset(&hdr,0,sizeof(hdr)); - if (NULL == orte_process_info.my_name) { /* my name isn't defined yet */ - hdr.msg_src = *ORTE_NAME_INVALID; - } else { - hdr.msg_src = *(orte_process_info.my_name); - } + hdr.msg_src = *(ORTE_PROC_MY_NAME); hdr.msg_dst = peer->peer_name; hdr.msg_type = MCA_OOB_TCP_CONNECT; MCA_OOB_TCP_HDR_HTON(&hdr); @@ -679,7 +676,7 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer, int sd) opal_output(0, "%s-%s mca_oob_tcp_peer_recv_connect_ack " "connect failed during receive. Restarting (%s).", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno)); } @@ -701,27 +698,16 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer, int sd) } /* compare the peers name to the expected value */ - if (0 != orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, &hdr.msg_src)) { + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->peer_name, &hdr.msg_src)) { opal_output(0, "%s-%s mca_oob_tcp_peer_recv_connect_ack: " "received unexpected process identifier %s\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), ORTE_NAME_PRINT(&(hdr.msg_src))); mca_oob_tcp_peer_close(peer); return ORTE_ERR_UNREACH; } - /* if we have an invalid name or do not have one assigned at all - - * use the name returned by the peer. This needs to be a LITERAL - * comparison - we do NOT want wildcard values to return EQUAL - */ - if(orte_process_info.my_name == NULL) { - orte_ns.create_process_name(&orte_process_info.my_name, - hdr.msg_dst.jobid, hdr.msg_dst.vpid); - } else if (orte_ns.compare_fields(ORTE_NS_CMP_ALL, orte_process_info.my_name, ORTE_NAME_INVALID) == ORTE_EQUAL) { - *orte_process_info.my_name = hdr.msg_dst; - } - /* connected */ mca_oob_tcp_peer_connected(peer, sd); if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { @@ -747,7 +733,7 @@ static int mca_oob_tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd, void if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) { opal_output(0, "%s-%s mca_oob_tcp_peer_recv_blocking: " "peer closed connection: peer state %d", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), peer->peer_state); } @@ -780,7 +766,7 @@ static int mca_oob_tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd, void opal_output(0, "%s-%s mca_oob_tcp_peer_recv_blocking: " "recv() failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(errno), errno); @@ -808,7 +794,7 @@ static int mca_oob_tcp_peer_send_blocking(mca_oob_tcp_peer_t* peer, int sd, void if(retval < 0) { if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) { opal_output(0, "%s-%s mca_oob_tcp_peer_send_blocking: send() failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); @@ -828,8 +814,8 @@ int mca_oob_tcp_peer_send_ident(mca_oob_tcp_peer_t* peer) mca_oob_tcp_hdr_t hdr; if(peer->peer_state != MCA_OOB_TCP_CONNECTED) return ORTE_SUCCESS; - hdr.msg_origin = *orte_process_info.my_name; - hdr.msg_src = *orte_process_info.my_name; + hdr.msg_src = *ORTE_PROC_MY_NAME; + hdr.msg_origin = *ORTE_PROC_MY_NAME; hdr.msg_dst = peer->peer_name; hdr.msg_type = MCA_OOB_TCP_IDENT; hdr.msg_size = 0; @@ -876,7 +862,7 @@ static void mca_oob_tcp_peer_recv_handler(int sd, short flags, void* user) MCA_OOB_TCP_MSG_ALLOC(msg, rc); if(NULL == msg) { opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to allocate recv message\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name))); return; } @@ -906,7 +892,7 @@ static void mca_oob_tcp_peer_recv_handler(int sd, short flags, void* user) default: { opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), peer->peer_state); mca_oob_tcp_peer_close(peer); @@ -954,7 +940,7 @@ static void mca_oob_tcp_peer_send_handler(int sd, short flags, void* user) } default: opal_output(0, "%s-%s mca_oob_tcp_peer_send_handler: invalid connection state (%d)", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), peer->peer_state); opal_event_del(&peer->peer_send_event); @@ -1020,7 +1006,7 @@ static void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg) #endif snprintf(buff, sizeof(buff), "%s-%s %s: %s - %s nodelay %d sndbuf %d rcvbuf %d flags %08x\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), msg, src, dst, nodelay, sndbuf, rcvbuf, flags); opal_output(0, buff); @@ -1041,11 +1027,11 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer, int sd) { int cmpval; OPAL_THREAD_LOCK(&peer->peer_lock); - cmpval = orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, orte_process_info.my_name); + cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->peer_name, ORTE_PROC_MY_NAME); if ((peer->peer_state == MCA_OOB_TCP_CLOSED) || (peer->peer_state == MCA_OOB_TCP_RESOLVE) || (peer->peer_state != MCA_OOB_TCP_CONNECTED && - cmpval == ORTE_VALUE1_GREATER)) { + cmpval == OPAL_VALUE1_GREATER)) { if(peer->peer_state != MCA_OOB_TCP_CLOSED) { mca_oob_tcp_peer_close(peer); @@ -1056,7 +1042,7 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer, int sd) if(mca_oob_tcp_peer_send_connect_ack(peer, sd) != ORTE_SUCCESS) { opal_output(0, "%s-%s mca_oob_tcp_peer_accept: " "mca_oob_tcp_peer_send_connect_ack failed\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name))); mca_oob_tcp_peer_close(peer); OPAL_THREAD_UNLOCK(&peer->peer_lock); diff --git a/orte/mca/oob/tcp/oob_tcp_peer.h b/orte/mca/oob/tcp/oob_tcp_peer.h index b2b1aa0597..05de91b405 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.h +++ b/orte/mca/oob/tcp/oob_tcp_peer.h @@ -24,6 +24,8 @@ #define _MCA_OOB_TCP_PEER_H_ #include "orte_config.h" +#include "orte/types.h" + #ifdef HAVE_NETINET_IN_H #include #endif @@ -31,10 +33,12 @@ #include "opal/class/opal_list.h" #include "opal/threads/mutex.h" -#include "orte/mca/ns/ns_types.h" +#include "opal/event/event.h" + #include "oob_tcp_msg.h" #include "oob_tcp_addr.h" -#include "opal/event/event.h" + +BEGIN_C_DECLS /** * the state of the connection @@ -94,10 +98,6 @@ typedef struct mca_oob_tcp_peer_t mca_oob_tcp_peer_t; &peer->super); \ } -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - /* * Class declaration. */ @@ -157,9 +157,7 @@ int mca_oob_tcp_peer_send_ident(mca_oob_tcp_peer_t* peer); */ void mca_oob_tcp_peer_dequeue_msg(mca_oob_tcp_peer_t* peer, mca_oob_tcp_msg_t* msg); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif /* _MCA_OOB_TCP_PEER_H */ diff --git a/orte/mca/oob/tcp/oob_tcp_ping.c b/orte/mca/oob/tcp/oob_tcp_ping.c index c50d9bd57c..ec71f014ee 100644 --- a/orte/mca/oob/tcp/oob_tcp_ping.c +++ b/orte/mca/oob/tcp/oob_tcp_ping.c @@ -23,6 +23,8 @@ */ #include "orte_config.h" +#include "orte/types.h" + #ifdef HAVE_UNISTD_H #include #endif @@ -51,8 +53,9 @@ #endif #include "opal/event/event.h" -#include "orte/mca/ns/ns_types.h" #include "orte/util/proc_info.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/oob/tcp/oob_tcp.h" @@ -89,7 +92,7 @@ mca_oob_tcp_ping(const orte_process_name_t* name, if(ORTE_SUCCESS != (rc = mca_oob_tcp_parse_uri(uri, (struct sockaddr*) &inaddr))) { opal_output(0, "%s-%s mca_oob_tcp_ping: invalid uri: %s\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(name), uri); return rc; @@ -100,7 +103,7 @@ mca_oob_tcp_ping(const orte_process_name_t* name, if (sd < 0) { opal_output(0, "%s-%s mca_oob_tcp_ping: socket() failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); @@ -110,7 +113,7 @@ mca_oob_tcp_ping(const orte_process_name_t* name, /* setup the socket as non-blocking */ if((flags = fcntl(sd, F_GETFL, 0)) < 0) { opal_output(0, "%s-%s mca_oob_tcp_ping: fcntl(F_GETFL) failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); @@ -118,7 +121,7 @@ mca_oob_tcp_ping(const orte_process_name_t* name, flags |= O_NONBLOCK; if(fcntl(sd, F_SETFL, flags) < 0) { opal_output(0, "%s-%s mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); @@ -142,7 +145,7 @@ mca_oob_tcp_ping(const orte_process_name_t* name, /* connect failed? */ if(opal_socket_errno != EINPROGRESS && opal_socket_errno != EWOULDBLOCK) { opal_output(0, "%s-%s mca_oob_tcp_ping: connect failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); @@ -164,7 +167,7 @@ mca_oob_tcp_ping(const orte_process_name_t* name, flags &= ~O_NONBLOCK; if(fcntl(sd, F_SETFL, flags) < 0) { opal_output(0, "%s-%s mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); @@ -172,11 +175,8 @@ mca_oob_tcp_ping(const orte_process_name_t* name, /* send a probe message */ memset(&hdr, 0, sizeof(hdr)); - if(orte_process_info.my_name != NULL) { - hdr.msg_src = *orte_process_info.my_name; - } else { - hdr.msg_src = *ORTE_NAME_INVALID; - } + hdr.msg_src = *ORTE_PROC_MY_NAME; + hdr.msg_dst = *name; hdr.msg_type = MCA_OOB_TCP_PROBE; MCA_OOB_TCP_HDR_HTON(&hdr); diff --git a/orte/mca/oob/tcp/oob_tcp_recv.c b/orte/mca/oob/tcp/oob_tcp_recv.c index 288640a478..4e66a8b541 100644 --- a/orte/mca/oob/tcp/oob_tcp_recv.c +++ b/orte/mca/oob/tcp/oob_tcp_recv.c @@ -18,8 +18,10 @@ * $HEADER$ */ #include "orte_config.h" +#include "orte/types.h" -#include "orte/mca/ns/ns.h" +#include "orte/util/proc_info.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/oob/tcp/oob_tcp.h" @@ -120,11 +122,7 @@ int mca_oob_tcp_recv_nb( /* fill in the header */ msg->msg_hdr.msg_origin = *peer; - if (NULL == orte_process_info.my_name) { - msg->msg_hdr.msg_src = *ORTE_NAME_INVALID; - } else { - msg->msg_hdr.msg_src = *orte_process_info.my_name; - } + msg->msg_hdr.msg_src = *ORTE_PROC_MY_NAME; msg->msg_hdr.msg_dst = *peer; msg->msg_hdr.msg_size = size; msg->msg_hdr.msg_tag = tag; @@ -199,7 +197,7 @@ int mca_oob_tcp_recv_cancel( mca_oob_tcp_msg_t* msg = (mca_oob_tcp_msg_t*)item; next = opal_list_get_next(item); - if (ORTE_EQUAL == orte_dss.compare(name, &msg->msg_peer, ORTE_NAME)) { + if (OPAL_EQUAL == opal_dss.compare(name, &msg->msg_peer, ORTE_NAME)) { if (msg->msg_hdr.msg_tag == tag) { opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_post, &msg->super.super); MCA_OOB_TCP_MSG_RETURN(msg); diff --git a/orte/mca/oob/tcp/oob_tcp_send.c b/orte/mca/oob/tcp/oob_tcp_send.c index 1203407cc4..cfb6be158c 100644 --- a/orte/mca/oob/tcp/oob_tcp_send.c +++ b/orte/mca/oob/tcp/oob_tcp_send.c @@ -18,9 +18,11 @@ * $HEADER$ */ #include "orte_config.h" +#include "orte/types.h" -#include "orte/mca/ns/ns_types.h" #include "orte/util/proc_info.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/oob/tcp/oob_tcp.h" @@ -119,7 +121,7 @@ int mca_oob_tcp_send_nb( if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { opal_output(0, "%s-%s mca_oob_tcp_send_nb: tag %d size %lu\n", - ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), tag, (unsigned long)size ); } @@ -129,11 +131,7 @@ int mca_oob_tcp_send_nb( msg->msg_hdr.msg_size = size; msg->msg_hdr.msg_tag = tag; msg->msg_hdr.msg_origin = *origin; - if (NULL == orte_process_info.my_name) { - msg->msg_hdr.msg_src = *ORTE_NAME_INVALID; - } else { - msg->msg_hdr.msg_src = *orte_process_info.my_name; - } + msg->msg_hdr.msg_src = *ORTE_PROC_MY_NAME; msg->msg_hdr.msg_dst = *target; /* create one additional iovect that will hold the size of the message */ @@ -154,7 +152,7 @@ int mca_oob_tcp_send_nb( msg->msg_complete = false; msg->msg_peer = peer->peer_name; - if (ORTE_EQUAL == mca_oob_tcp_process_name_compare(target, orte_process_info.my_name)) { /* local delivery */ + if (OPAL_EQUAL == mca_oob_tcp_process_name_compare(target, ORTE_PROC_MY_NAME)) { /* local delivery */ rc = mca_oob_tcp_send_self(peer,msg,iov,count); if (rc < 0 ) { return rc; diff --git a/orte/mca/pls/Makefile.am b/orte/mca/plm/Makefile.am similarity index 85% rename from orte/mca/pls/Makefile.am rename to orte/mca/plm/Makefile.am index 65e0b6c3c2..1190bc3409 100644 --- a/orte/mca/pls/Makefile.am +++ b/orte/mca/plm/Makefile.am @@ -17,21 +17,21 @@ # # main library setup -noinst_LTLIBRARIES = libmca_pls.la -libmca_pls_la_SOURCES = +noinst_LTLIBRARIES = libmca_plm.la +libmca_plm_la_SOURCES = # header setup nobase_orte_HEADERS = dist_pkgdata_DATA = # local files -headers = pls.h pls_types.h -libmca_pls_la_SOURCES += $(headers) +headers = plm.h plm_types.h +libmca_plm_la_SOURCES += $(headers) # Conditionally install the header files if WANT_INSTALL_HEADERS nobase_orte_HEADERS += $(headers) -ortedir = $(includedir)/openmpi/orte/mca/pls +ortedir = $(includedir)/openmpi/orte/mca/plm else ortedir = $(includedir) endif diff --git a/orte/mca/pls/alps/Makefile.am b/orte/mca/plm/alps/Makefile.am similarity index 71% rename from orte/mca/pls/alps/Makefile.am rename to orte/mca/plm/alps/Makefile.am index e675636481..25d4ad78cd 100644 --- a/orte/mca/pls/alps/Makefile.am +++ b/orte/mca/plm/alps/Makefile.am @@ -9,7 +9,6 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008 UT-Battelle, LLC # $COPYRIGHT$ # # Additional copyrights may follow @@ -18,32 +17,32 @@ # sources = \ - pls_alps.h \ - pls_alps_component.c \ - pls_alps_module.c + plm_alps.h \ + plm_alps_component.c \ + plm_alps_module.c -dist_pkgdata_DATA = help-pls-alps.txt +dist_pkgdata_DATA = help-plm-alps.txt # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_pls_alps_DSO +if OMPI_BUILD_plm_alps_DSO component_noinst = -component_install = mca_pls_alps.la +component_install = mca_plm_alps.la else -component_noinst = libmca_pls_alps.la +component_noinst = libmca_plm_alps.la component_install = endif mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_pls_alps_la_SOURCES = $(sources) -mca_pls_alps_la_LDFLAGS = -module -avoid-version -mca_pls_alps_la_LIBADD = \ +mca_plm_alps_la_SOURCES = $(sources) +mca_plm_alps_la_LDFLAGS = -module -avoid-version +mca_plm_alps_la_LIBADD = \ $(top_ompi_builddir)/orte/libopen-rte.la \ $(top_ompi_builddir)/opal/libopen-pal.la noinst_LTLIBRARIES = $(component_noinst) -libmca_pls_alps_la_SOURCES =$(sources) -libmca_pls_alps_la_LDFLAGS = -module -avoid-version +libmca_plm_alps_la_SOURCES =$(sources) +libmca_plm_alps_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sds/pipe/configure.m4 b/orte/mca/plm/alps/configure.m4 similarity index 83% rename from orte/mca/sds/pipe/configure.m4 rename to orte/mca/plm/alps/configure.m4 index d1914702b5..413bce53fe 100644 --- a/orte/mca/sds/pipe/configure.m4 +++ b/orte/mca/plm/alps/configure.m4 @@ -17,9 +17,8 @@ # $HEADER$ # -# MCA_sds_pipe_CONFIG([action-if-found], [action-if-not-found]) +# MCA_plm_alps_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_sds_pipe_CONFIG],[ - # check for pipe - AC_CHECK_FUNC([pipe], [$1], [$2]) +AC_DEFUN([MCA_plm_alps_CONFIG],[ + OMPI_CHECK_ALPS([plm_alps], [$1], [$2]) ])dnl diff --git a/orte/mca/pls/lsf/configure.params b/orte/mca/plm/alps/configure.params similarity index 100% rename from orte/mca/pls/lsf/configure.params rename to orte/mca/plm/alps/configure.params diff --git a/orte/mca/pls/alps/help-pls-alps.txt b/orte/mca/plm/alps/help-plm-alps.txt similarity index 97% rename from orte/mca/pls/alps/help-pls-alps.txt rename to orte/mca/plm/alps/help-plm-alps.txt index 96bfdd4807..bb170841e1 100644 --- a/orte/mca/pls/alps/help-pls-alps.txt +++ b/orte/mca/plm/alps/help-plm-alps.txt @@ -10,7 +10,6 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008 UT-Battelle, LLC # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/orte/tools/orteprobe/orteprobe.h b/orte/mca/plm/alps/plm_alps.h similarity index 55% rename from orte/tools/orteprobe/orteprobe.h rename to orte/mca/plm/alps/plm_alps.h index 267775e210..eea32f6600 100644 --- a/orte/tools/orteprobe/orteprobe.h +++ b/orte/mca/plm/alps/plm_alps.h @@ -16,43 +16,35 @@ * $HEADER$ */ -#ifndef ORTEPROBE_H -#define ORTEPROBE_H +#ifndef ORTE_PLM_ALPS_EXPORT_H +#define ORTE_PLM_ALPS_EXPORT_H #include "orte_config.h" -#include - -#include "opal/class/opal_list.h" -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" - -#include "opal/util/cmd_line.h" #include "opal/mca/mca.h" +#include "orte/mca/plm/plm.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS + +struct orte_plm_alps_component_t { + orte_plm_base_component_t super; + int priority; + int debug; + bool timing; + char *orted; + char *custom_args; +}; +typedef struct orte_plm_alps_component_t orte_plm_alps_component_t; /* - * Globals + * Globally exported variable */ -typedef struct { - bool help; - bool verbose; - bool debug; - char* name_string; - char* requestor_string; - opal_mutex_t mutex; - opal_condition_t condition; - bool exit_condition; -} orteprobe_globals_t; +ORTE_MODULE_DECLSPEC extern orte_plm_alps_component_t + mca_plm_alps_component; +ORTE_DECLSPEC extern orte_plm_base_module_t + orte_plm_alps_module; -extern orteprobe_globals_t orteprobe_globals; +END_C_DECLS -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* ORTEPROBE_H */ +#endif /* ORTE_PLM_ALPS_EXPORT_H */ diff --git a/orte/mca/pls/alps/pls_alps_component.c b/orte/mca/plm/alps/plm_alps_component.c similarity index 53% rename from orte/mca/pls/alps/pls_alps_component.c rename to orte/mca/plm/alps/plm_alps_component.c index 410cd523a6..6017089197 100644 --- a/orte/mca/pls/alps/pls_alps_component.c +++ b/orte/mca/plm/alps/plm_alps_component.c @@ -9,7 +9,6 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,33 +23,33 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include "opal/util/output.h" #include "opal/mca/base/mca_base_param.h" -#include "orte/orte_constants.h" -#include "orte/util/proc_info.h" +#include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/base/base.h" -#include "orte/mca/pls/base/pls_private.h" -#include "pls_alps.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/plm/base/plm_private.h" +#include "plm_alps.h" /* - * Public string showing the pls ompi_alps component version number + * Public string showing the plm ompi_alps component version number */ -const char *mca_pls_alps_component_version_string = - "Open MPI alps pls MCA component version " ORTE_VERSION; +const char *mca_plm_alps_component_version_string = + "Open MPI alps plm MCA component version " ORTE_VERSION; /* * Local functions */ -static int pls_alps_open(void); -static int pls_alps_close(void); -static orte_pls_base_module_t *pls_alps_init(int *priority); +static int plm_alps_open(void); +static int plm_alps_close(void); +static orte_plm_base_module_t *plm_alps_init(int *priority); /* @@ -58,17 +57,17 @@ static orte_pls_base_module_t *pls_alps_init(int *priority); * and pointers to our public functions in it */ -orte_pls_alps_component_t mca_pls_alps_component = { +orte_plm_alps_component_t mca_plm_alps_component = { { /* First, the mca_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a pls v1.3.0 component (which also + /* Indicate that we are a plm v1.0.0 component (which also implies a specific MCA version) */ - ORTE_PLS_BASE_VERSION_1_3_0, + ORTE_PLM_BASE_VERSION_1_0_0, /* Component name and version */ @@ -79,8 +78,8 @@ orte_pls_alps_component_t mca_pls_alps_component = { /* Component open and close functions */ - pls_alps_open, - pls_alps_close + plm_alps_open, + plm_alps_close }, /* Next the MCA v1.0.0 component meta data */ @@ -92,75 +91,60 @@ orte_pls_alps_component_t mca_pls_alps_component = { /* Initialization / querying functions */ - pls_alps_init + plm_alps_init } - /* Other orte_pls_alps_component_t items -- left uninitialized - here; will be initialized in pls_alps_open() */ + /* Other orte_plm_alps_component_t items -- left uninitialized + here; will be initialized in plm_alps_open() */ }; -static int pls_alps_open(void) +static int plm_alps_open(void) { - mca_base_component_t *comp = &mca_pls_alps_component.super.pls_version; - int tmp, value; + mca_base_component_t *comp = &mca_plm_alps_component.super.plm_version; - mca_base_param_reg_int(comp, "debug", "Enable debugging of alps pls", + mca_base_param_reg_int(comp, "debug", "Enable debugging of alps plm", false, false, 0, - &mca_pls_alps_component.debug); - if (mca_pls_alps_component.debug == 0) { - mca_base_param_reg_int_name("orte", "debug", - "Whether or not to enable debugging output for all ORTE components (0 or 1)", - false, false, false, &mca_pls_alps_component.debug); + &mca_plm_alps_component.debug); + if (mca_plm_alps_component.debug == 0) { + mca_plm_alps_component.debug = orte_debug_flag; } mca_base_param_reg_int(comp, "priority", "Default selection priority", false, false, 75, - &mca_pls_alps_component.priority); + &mca_plm_alps_component.priority); mca_base_param_reg_string(comp, "orted", "Command to use to start proxy orted", false, false, "orted", - &mca_pls_alps_component.orted); + &mca_plm_alps_component.orted); - tmp = mca_base_param_reg_int_name("orte", "timing", - "Request that critical timing loops be measured", - false, false, 0, &value); - if (value != 0) { - mca_pls_alps_component.timing = true; - } else { - mca_pls_alps_component.timing = false; - } + mca_plm_alps_component.timing = orte_timing; mca_base_param_reg_string(comp, "args", "Custom arguments to srun", false, false, NULL, - &mca_pls_alps_component.custom_args); + &mca_plm_alps_component.custom_args); return ORTE_SUCCESS; } -static orte_pls_base_module_t *pls_alps_init(int *priority) +static orte_plm_base_module_t *plm_alps_init(int *priority) { - /* if we are NOT an HNP, then don't select us */ - if (!orte_process_info.seed) { - return NULL; - } - - *priority = mca_pls_alps_component.priority; - return &orte_pls_alps_module; + *priority = mca_plm_alps_component.priority; + return &orte_plm_alps_module; } -static int pls_alps_close(void) +static int plm_alps_close(void) { - if (NULL != mca_pls_alps_component.orted) { - free(mca_pls_alps_component.orted); + if (NULL != mca_plm_alps_component.orted) { + free(mca_plm_alps_component.orted); } - if (NULL != mca_pls_alps_component.custom_args) { - free(mca_pls_alps_component.custom_args); + if (NULL != mca_plm_alps_component.custom_args) { + free(mca_plm_alps_component.custom_args); } return ORTE_SUCCESS; diff --git a/orte/mca/pls/alps/pls_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c similarity index 68% rename from orte/mca/pls/alps/pls_alps_module.c rename to orte/mca/plm/alps/plm_alps_module.c index ed2789c4f9..fc557dc013 100644 --- a/orte/mca/pls/alps/pls_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -12,7 +12,6 @@ * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2008 UT-Battelle, LLC * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,8 +26,8 @@ */ #include "orte_config.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" +#include "orte/constants.h" +#include "orte/types.h" #include #ifdef HAVE_UNISTD_H @@ -60,48 +59,45 @@ #include "opal/util/basename.h" #include "opal/mca/base/mca_base_param.h" -#include "orte/runtime/params.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" #include "orte/runtime/runtime.h" #include "orte/runtime/orte_wakeup.h" #include "orte/runtime/orte_wait.h" -#include "orte/mca/ns/base/base.h" -#include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/smr/smr.h" #include "orte/mca/rmaps/rmaps.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/base/base.h" -#include "orte/mca/pls/base/pls_private.h" -#include "pls_alps.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/plm/base/base.h" +#include "orte/mca/plm/base/plm_private.h" +#include "plm_alps.h" /* * Local functions */ -static int pls_alps_launch_job(orte_jobid_t jobid); -static int pls_alps_terminate_job(orte_jobid_t jobid, struct timeval *timeout, opal_list_t *attrs); -static int pls_alps_terminate_orteds(struct timeval *timeout, opal_list_t *attrs); -static int pls_alps_terminate_proc(const orte_process_name_t *name); -static int pls_alps_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t *attrs); -static int pls_alps_signal_proc(const orte_process_name_t *name, int32_t signal); -static int pls_alps_finalize(void); +static int plm_alps_init(void); +static int plm_alps_launch_job(orte_job_t *jdata); +static int plm_alps_terminate_job(orte_jobid_t jobid); +static int plm_alps_terminate_orteds(void); +static int plm_alps_signal_job(orte_jobid_t jobid, int32_t signal); +static int plm_alps_finalize(void); -static int pls_alps_start_proc(int argc, char **argv, char **env, +static int plm_alps_start_proc(int argc, char **argv, char **env, char *prefix); /* * Global variable */ -orte_pls_base_module_1_3_0_t orte_pls_alps_module = { - pls_alps_launch_job, - pls_alps_terminate_job, - pls_alps_terminate_orteds, - pls_alps_terminate_proc, - pls_alps_signal_job, - pls_alps_signal_proc, - pls_alps_finalize +orte_plm_base_module_t orte_plm_alps_module = { + plm_alps_init, + orte_plm_base_set_hnp_name, + plm_alps_launch_job, + plm_alps_terminate_job, + plm_alps_terminate_orteds, + plm_alps_signal_job, + plm_alps_finalize }; /* @@ -112,15 +108,27 @@ static orte_jobid_t active_job = ORTE_JOBID_INVALID; static bool failed_launch; +/** +* Init the module + */ +static int plm_alps_init(void) +{ + int rc; + + if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) { + ORTE_ERROR_LOG(rc); + } + return rc; +} + + /* When working in this function, ALWAYS jump to "cleanup" if * you encounter an error so that orterun will be woken up and * the job can cleanly terminate */ -static int pls_alps_launch_job(orte_jobid_t jobid) +static int plm_alps_launch_job(orte_job_t *jdata) { - orte_job_map_t *map = NULL; - opal_list_item_t *item; - size_t num_nodes; + orte_job_map_t *map; char *jobid_string = NULL; char *param; char **argv = NULL; @@ -132,46 +140,64 @@ static int pls_alps_launch_job(orte_jobid_t jobid) char *nodelist_flat; char **nodelist_argv; int nodelist_argc; - orte_process_name_t name; - char *name_string; + char *vpid_string; char **custom_strings; int num_args, i; char *cur_prefix; struct timeval joblaunchstart, launchstart, launchstop; - int proc_name_index = 0; - - if (mca_pls_alps_component.timing) { + int proc_vpid_index; + orte_app_context_t **apps; + orte_node_t **nodes; + orte_std_cntr_t nnode; + + if (mca_plm_alps_component.timing) { if (0 != gettimeofday(&joblaunchstart, NULL)) { - opal_output(0, "pls_alps: could not obtain job start time"); + opal_output(0, "plm_alps: could not obtain job start time"); } } - /* save the active jobid */ - active_job = jobid; - /* indicate the state of the launch */ failed_launch = true; - /* Query the map for this job. - * We need the entire mapping for a couple of reasons: - * - need the prefix to start with. - * - need to know if we are launching on a subset of the allocated nodes - * All other mapping responsibilities fall to orted in the fork PLS - */ - rc = orte_rmaps.get_job_map(&map, jobid); - if (ORTE_SUCCESS != rc) { + /* create a jobid for this job */ + if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) { ORTE_ERROR_LOG(rc); goto cleanup; } - num_nodes = map->num_new_daemons; - if (num_nodes == 0) { - /* no new daemons required - just launch apps */ + OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, + "%s plm:alps: launching job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jdata->jobid))); + + /* setup the job */ + if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* save the active jobid */ + active_job = jdata->jobid; + + /* Get the map for this job */ + if (NULL == (map = orte_rmaps.get_job_map(active_job))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rc = ORTE_ERR_NOT_FOUND; + goto cleanup; + } + apps = (orte_app_context_t**)jdata->apps->addr; + nodes = (orte_node_t**)map->nodes->addr; + + if (0 == map->num_new_daemons) { + /* have all the daemons we need - launch app */ + OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, + "%s plm:alps: no new daemons to launch", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto launch_apps; } - + /* need integer value for command line parameter */ - asprintf(&jobid_string, "%lu", (unsigned long) jobid); + orte_util_convert_jobid_to_string(&jobid_string, jdata->jobid); /* * start building argv array @@ -187,8 +213,8 @@ static int pls_alps_launch_job(orte_jobid_t jobid) opal_argv_append(&argc, &argv, "aprun"); /* Append user defined arguments to aprun */ - if ( NULL != mca_pls_alps_component.custom_args ) { - custom_strings = opal_argv_split(mca_pls_alps_component.custom_args, ' '); + if ( NULL != mca_plm_alps_component.custom_args ) { + custom_strings = opal_argv_split(mca_plm_alps_component.custom_args, ' '); num_args = opal_argv_count(custom_strings); for (i = 0; i < num_args; ++i) { opal_argv_append(&argc, &argv, custom_strings[i]); @@ -197,7 +223,7 @@ static int pls_alps_launch_job(orte_jobid_t jobid) } /* number of processors needed */ - asprintf(&tmp, "-n %lu", (unsigned long) num_nodes); + asprintf(&tmp, "-n %lu", (unsigned long) map->num_new_daemons); opal_argv_append(&argc, &argv, tmp); free(tmp); opal_argv_append(&argc, &argv, "-N 1"); @@ -206,25 +232,21 @@ static int pls_alps_launch_job(orte_jobid_t jobid) nodelist_argv = NULL; nodelist_argc = 0; - for (item = opal_list_get_first(&map->nodes); - item != opal_list_get_end(&map->nodes); - item = opal_list_get_next(item)) { - orte_mapped_node_t* node = (orte_mapped_node_t*)item; - + for (nnode=0; nnode < map->num_nodes; nnode++) { /* if the daemon already exists on this node, then * don't include it */ - if (node->daemon_preexists) { + if (nodes[nnode]->daemon_launched) { continue; } /* otherwise, add it to the list of nodes upon which * we need to launch a daemon */ - opal_argv_append(&nodelist_argc, &nodelist_argv, node->nodename); + opal_argv_append(&nodelist_argc, &nodelist_argv, nodes[nnode]->name); } if (0 == opal_argv_count(nodelist_argv)) { - opal_show_help("help-pls-alps.txt", "no-hosts-in-list", true); + opal_show_help("help-plm-alps.txt", "no-hosts-in-list", true); rc = ORTE_ERR_FAILED_TO_START; goto cleanup; } @@ -240,40 +262,32 @@ static int pls_alps_launch_job(orte_jobid_t jobid) */ /* add the daemon command (as specified by user) */ - opal_argv_append(&argc, &argv, mca_pls_alps_component.orted); + opal_argv_append(&argc, &argv, mca_plm_alps_component.orted); - /* ensure we don't lose contact */ - orte_no_daemonize_flag = true; - /* Add basic orted command line options, including debug flags */ - orte_pls_base_orted_append_basic_args(&argc, &argv, - &proc_name_index, + orte_plm_base_orted_append_basic_args(&argc, &argv, + "alps", + &proc_vpid_index, NULL); - /* force orted to use the alps sds */ - opal_argv_append(&argc, &argv, "--ns-nds"); - opal_argv_append(&argc, &argv, "alps"); - /* tell the new daemons the base of the name list so they can compute * their own name on the other end */ - name.jobid = 0; - name.vpid = map->daemon_vpid_start; - rc = orte_ns.get_proc_name_string(&name_string, &name); + rc = orte_util_convert_vpid_to_string(&vpid_string, map->daemon_vpid_start); if (ORTE_SUCCESS != rc) { - opal_output(0, "pls_alps: unable to create process name"); + opal_output(0, "plm_alps: unable to create process name"); goto cleanup; } - free(argv[proc_name_index]); - argv[proc_name_index] = strdup(name_string); - free(name_string); + free(argv[proc_vpid_index]); + argv[proc_vpid_index] = strdup(vpid_string); + free(vpid_string); - if (mca_pls_alps_component.debug) { + if (mca_plm_alps_component.debug) { param = opal_argv_join(argv, ' '); if (NULL != param) { - opal_output(0, "pls:alps: final top-level argv:"); - opal_output(0, "pls:alps: %s", param); + opal_output(0, "plm:alps: final top-level argv:"); + opal_output(0, "plm:alps: %s", param); free(param); } } @@ -283,16 +297,16 @@ static int pls_alps_launch_job(orte_jobid_t jobid) different prefix's in the app context, complain (i.e., only allow one --prefix option for the entire alps run -- we don't support different --prefix'es for different nodes in - the ALPS pls) */ + the ALPS plm) */ cur_prefix = NULL; - for (i=0; i < map->num_apps; i++) { - char * app_prefix_dir = map->apps[i]->prefix_dir; + for (i=0; i < jdata->num_apps; i++) { + char * app_prefix_dir = apps[i]->prefix_dir; /* Check for already set cur_prefix_dir -- if different, complain */ if (NULL != app_prefix_dir) { if (NULL != cur_prefix && 0 != strcmp (cur_prefix, app_prefix_dir)) { - opal_show_help("help-pls-alps.txt", "multiple-prefixes", + opal_show_help("help-plm-alps.txt", "multiple-prefixes", true, cur_prefix, app_prefix_dir); return ORTE_ERR_FATAL; } @@ -301,8 +315,8 @@ static int pls_alps_launch_job(orte_jobid_t jobid) same anyway */ if (NULL == cur_prefix) { cur_prefix = strdup(app_prefix_dir); - if (mca_pls_alps_component.debug) { - opal_output (0, "pls:alps: Set prefix:%s", + if (mca_plm_alps_component.debug) { + opal_output (0, "plm:alps: Set prefix:%s", cur_prefix); } } @@ -312,23 +326,20 @@ static int pls_alps_launch_job(orte_jobid_t jobid) /* setup environment */ env = opal_argv_copy(environ); - /* purge it of any params not for orteds */ - orte_pls_base_purge_mca_params(&env); - /* add the nodelist */ var = mca_base_param_environ_variable("orte", "alps", "nodelist"); opal_setenv(var, nodelist_flat, true, &env); free(nodelist_flat); free(var); - if (mca_pls_alps_component.timing) { + if (mca_plm_alps_component.timing) { if (0 != gettimeofday(&launchstart, NULL)) { - opal_output(0, "pls_alps: could not obtain start time"); + opal_output(0, "plm_alps: could not obtain start time"); } } /* exec the daemon(s) */ - if (ORTE_SUCCESS != (rc = pls_alps_start_proc(argc, argv, env, cur_prefix))) { + if (ORTE_SUCCESS != (rc = plm_alps_start_proc(argc, argv, env, cur_prefix))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -339,13 +350,13 @@ static int pls_alps_launch_job(orte_jobid_t jobid) */ /* wait for daemons to callback */ - if (ORTE_SUCCESS != (rc = orte_pls_base_daemon_callback(map->num_new_daemons))) { + if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) { ORTE_ERROR_LOG(rc); goto cleanup; } launch_apps: - if (ORTE_SUCCESS != (rc = orte_pls_base_launch_apps(map))) { + if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -353,30 +364,27 @@ launch_apps: /* declare the launch a success */ failed_launch = false; - if (mca_pls_alps_component.timing) { + if (mca_plm_alps_component.timing) { if (0 != gettimeofday(&launchstop, NULL)) { - opal_output(0, "pls_alps: could not obtain stop time"); + opal_output(0, "plm_alps: could not obtain stop time"); } else { - opal_output(0, "pls_alps: daemon block launch time is %ld usec", + opal_output(0, "plm_alps: daemon block launch time is %ld usec", (launchstop.tv_sec - launchstart.tv_sec)*1000000 + (launchstop.tv_usec - launchstart.tv_usec)); - opal_output(0, "pls_alps: total job launch time is %ld usec", + opal_output(0, "plm_alps: total job launch time is %ld usec", (launchstop.tv_sec - joblaunchstart.tv_sec)*1000000 + (launchstop.tv_usec - joblaunchstart.tv_usec)); } } if (ORTE_SUCCESS != rc) { - opal_output(0, "pls:alps: start_procs returned error %d", rc); + opal_output(0, "plm:alps: start_procs returned error %d", rc); goto cleanup; } /* JMS: short we stash the alps pid in the gpr somewhere for cleanup? */ cleanup: - if (NULL != map) { - OBJ_RELEASE(map); - } if (NULL != argv) { opal_argv_free(argv); } @@ -390,19 +398,19 @@ cleanup: /* check for failed launch - if so, force terminate */ if (failed_launch) { - orte_pls_base_daemon_failed(jobid, false, -1, 0, ORTE_JOB_STATE_FAILED_TO_START); + orte_plm_base_launch_failed(active_job, false, -1, 0, ORTE_JOB_STATE_FAILED_TO_START); } return rc; } -static int pls_alps_terminate_job(orte_jobid_t jobid, struct timeval *timeout, opal_list_t *attrs) +static int plm_alps_terminate_job(orte_jobid_t jobid) { int rc; /* order them to kill their local procs for this job */ - if (ORTE_SUCCESS != (rc = orte_pls_base_orted_kill_local_procs(jobid, timeout, attrs))) { + if (ORTE_SUCCESS != (rc = orte_plm_base_orted_kill_local_procs(jobid))) { ORTE_ERROR_LOG(rc); } @@ -413,7 +421,7 @@ static int pls_alps_terminate_job(orte_jobid_t jobid, struct timeval *timeout, o /** * Terminate the orteds for a given job */ -static int pls_alps_terminate_orteds(struct timeval *timeout, opal_list_t *attrs) +static int plm_alps_terminate_orteds(void) { int rc; @@ -425,7 +433,7 @@ static int pls_alps_terminate_orteds(struct timeval *timeout, opal_list_t *attrs orte_wait_cb_cancel(alps_pid); /* tell them to die! */ - if (ORTE_SUCCESS != (rc = orte_pls_base_orted_exit(timeout, attrs))) { + if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit())) { ORTE_ERROR_LOG(rc); } @@ -433,21 +441,10 @@ static int pls_alps_terminate_orteds(struct timeval *timeout, opal_list_t *attrs } -/* - * The way we've used ALPS, we can't kill individual processes -- - * we'll kill the entire job - */ -static int pls_alps_terminate_proc(const orte_process_name_t *name) -{ - opal_output(0, "pls:alps:terminate_proc: not supported"); - return ORTE_ERR_NOT_SUPPORTED; -} - - /** * Signal all the processes in the child alps by sending the signal directly to it */ -static int pls_alps_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t *attrs) +static int plm_alps_signal_job(orte_jobid_t jobid, int32_t signal) { if (0 != alps_pid) { kill(alps_pid, (int)signal); @@ -456,22 +453,12 @@ static int pls_alps_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t * } -/* - * Signal a specific process - */ -static int pls_alps_signal_proc(const orte_process_name_t *name, int32_t signal) -{ - opal_output(0, "pls:alps:signal_proc: not supported"); - return ORTE_ERR_NOT_SUPPORTED; -} - - -static int pls_alps_finalize(void) +static int plm_alps_finalize(void) { int rc; /* cleanup any pending recvs */ - if (ORTE_SUCCESS != (rc = orte_pls_base_comm_stop())) { + if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) { ORTE_ERROR_LOG(rc); } @@ -507,20 +494,20 @@ static void alps_wait_cb(pid_t pid, int status, void* cbdata){ /* report that the daemon has failed so we break out of the daemon * callback receive and exit */ - orte_pls_base_daemon_failed(active_job, true, pid, status, ORTE_JOB_STATE_FAILED_TO_START); + orte_plm_base_launch_failed(active_job, true, pid, status, ORTE_JOB_STATE_FAILED_TO_START); } else { /* an orted must have died unexpectedly after launch - report * that the daemon has failed so we exit */ - orte_pls_base_daemon_failed(active_job, false, pid, status, ORTE_JOB_STATE_ABORTED); + orte_plm_base_launch_failed(active_job, false, pid, status, ORTE_JOB_STATE_ABORTED); } } } -static int pls_alps_start_proc(int argc, char **argv, char **env, +static int plm_alps_start_proc(int argc, char **argv, char **env, char *prefix) { int fd; @@ -540,7 +527,7 @@ static int pls_alps_start_proc(int argc, char **argv, char **env, char *bin_base = NULL, *lib_base = NULL; /* Figure out the basenames for the libdir and bindir. There - is a lengthy comment about this in pls_rsh_module.c + is a lengthy comment about this in plm_rsh_module.c explaining all the rationale for how / why we're doing this. */ @@ -560,8 +547,8 @@ static int pls_alps_start_proc(int argc, char **argv, char **env, asprintf(&newenv, "%s/%s", prefix, bin_base); } opal_setenv("PATH", newenv, true, &env); - if (mca_pls_alps_component.debug) { - opal_output(0, "pls:alps: reset PATH: %s", newenv); + if (mca_plm_alps_component.debug) { + opal_output(0, "plm:alps: reset PATH: %s", newenv); } free(newenv); @@ -573,8 +560,8 @@ static int pls_alps_start_proc(int argc, char **argv, char **env, asprintf(&newenv, "%s/%s", prefix, lib_base); } opal_setenv("LD_LIBRARY_PATH", newenv, true, &env); - if (mca_pls_alps_component.debug) { - opal_output(0, "pls:alps: reset LD_LIBRARY_PATH: %s", + if (mca_plm_alps_component.debug) { + opal_output(0, "plm:alps: reset LD_LIBRARY_PATH: %s", newenv); } free(newenv); @@ -587,7 +574,7 @@ static int pls_alps_start_proc(int argc, char **argv, char **env, /* When not in debug mode and --debug-daemons was not passed, * tie stdout/stderr to dev null so we don't see messages from orted */ - if (0 == mca_pls_alps_component.debug && !orte_debug_daemons_flag) { + if (0 == mca_plm_alps_component.debug && !orte_debug_daemons_flag) { if (fd >= 0) { if (fd != 1) { dup2(fd,1); @@ -608,10 +595,9 @@ static int pls_alps_start_proc(int argc, char **argv, char **env, setpgid(0, 0); - char* param = opal_argv_join(argv, ';'); execve(exec_argv, argv, env); - opal_output(0, "pls:alps:start_proc: exec failed"); + opal_output(0, "plm:alps:start_proc: exec failed"); /* don't return - need to exit - returning would be bad - we're not in the calling process anymore */ exit(1); diff --git a/orte/mca/pls/base/Makefile.am b/orte/mca/plm/base/Makefile.am similarity index 68% rename from orte/mca/pls/base/Makefile.am rename to orte/mca/plm/base/Makefile.am index 6be8ff3ad4..832c6a1d41 100644 --- a/orte/mca/pls/base/Makefile.am +++ b/orte/mca/plm/base/Makefile.am @@ -16,17 +16,18 @@ # $HEADER$ # -dist_pkgdata_DATA += base/help-pls-base.txt +dist_pkgdata_DATA += base/help-plm-base.txt headers += \ - base/pls_private.h \ + base/plm_private.h \ base/base.h -libmca_pls_la_SOURCES += \ - base/pls_base_close.c \ - base/pls_base_general_support_fns.c \ - base/pls_base_open.c \ - base/pls_base_receive.c \ - base/pls_base_select.c \ - base/pls_base_reuse_daemon_launch.c \ - base/pls_base_orted_cmds.c +libmca_plm_la_SOURCES += \ + base/plm_base_close.c \ + base/plm_base_open.c \ + base/plm_base_receive.c \ + base/plm_base_select.c \ + base/plm_base_launch_support.c \ + base/plm_base_jobid.c \ + base/plm_base_proxy.c \ + base/plm_base_orted_cmds.c diff --git a/orte/mca/plm/base/base.h b/orte/mca/plm/base/base.h new file mode 100644 index 0000000000..6a4d630aca --- /dev/null +++ b/orte/mca/plm/base/base.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef MCA_PLM_BASE_H +#define MCA_PLM_BASE_H + +/* + * includes + */ +#include "orte_config.h" + +#include "opal/mca/mca.h" +#include "opal/class/opal_list.h" + +#include "orte/mca/plm/plm.h" + + +BEGIN_C_DECLS + +/** + * Struct to hold data for public access + */ +typedef struct orte_plm_base_t { + /** List of opened components */ + opal_list_t available_components; + /** indicate a component has been selected */ + bool selected; + /** selected component */ + orte_plm_base_component_t selected_component; +} orte_plm_base_t; + +/** + * Global instance of publicly-accessible PLM framework data + */ +ORTE_DECLSPEC extern orte_plm_base_t orte_plm_base; + +/* + * Global functions for MCA overall collective open and close + */ + +/** + * Open the plm framework + */ +ORTE_DECLSPEC int orte_plm_base_open(void); +/** + * Select a plm module + */ +ORTE_DECLSPEC int orte_plm_base_select(void); + +/** + * Close the plm framework + */ +ORTE_DECLSPEC int orte_plm_base_finalize(void); +ORTE_DECLSPEC int orte_plm_base_close(void); + +END_C_DECLS + +#endif diff --git a/orte/mca/pls/base/help-pls-base.txt b/orte/mca/plm/base/help-plm-base.txt similarity index 67% rename from orte/mca/pls/base/help-pls-base.txt rename to orte/mca/plm/base/help-plm-base.txt index 30b9870298..170fb282cb 100644 --- a/orte/mca/pls/base/help-pls-base.txt +++ b/orte/mca/plm/base/help-plm-base.txt @@ -39,3 +39,21 @@ This may be because the daemon was unable to find all the needed shared libraries on the remote node. You may set your LD_LIBRARY_PATH to have the location of the shared libraries on the remote nodes and this will automatically be forwarded to the remote nodes. +# +[incomplete-exit-cmd] +One or more daemons could not be ordered to exit. This can be caused by a +number of rather rare problems, but typically is caused by a daemon having +died due to the failure of a node or its communications. This could result +in an incomplete cleanup on the affected nodes. Please see below for a list +of nodes which may require additional cleanup. + +We are truly sorry for the inconvenience. +# +[incomplete-kill-procs-cmd] +One or more daemons could not be ordered to kill their local processes. +This can be caused by a number of rather rare problems, but typically +is caused by a daemon having died due to the failure of a node or its +communications. This could result in an incomplete cleanup on the affected +nodes. Additional information may be available below. + +We are truly sorry for the inconvenience. diff --git a/orte/mca/pls/base/pls_base_close.c b/orte/mca/plm/base/plm_base_close.c similarity index 64% rename from orte/mca/pls/base/pls_base_close.c rename to orte/mca/plm/base/plm_base_close.c index 6080c6ae9d..056e321d87 100644 --- a/orte/mca/pls/base/pls_base_close.c +++ b/orte/mca/plm/base/plm_base_close.c @@ -17,10 +17,10 @@ */ #include "orte_config.h" +#include "orte/constants.h" #include -#include "orte/orte_constants.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/util/output.h" @@ -28,19 +28,19 @@ #include "orte/util/proc_info.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/pls/base/pls_private.h" -#include "orte/mca/pls/base/base.h" +#include "orte/mca/plm/base/plm_private.h" +#include "orte/mca/plm/base/base.h" -int orte_pls_base_finalize(void) +int orte_plm_base_finalize(void) { int rc; /* Finalize the selected module */ - orte_pls.finalize(); + orte_plm.finalize(); - /* if we are an HNP, then stop our receive */ - if (orte_process_info.seed) { - if (ORTE_SUCCESS != (rc = orte_pls_base_comm_stop())) { + /* if we are the HNP, then stop our receive */ + if (orte_process_info.hnp) { + if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) { ORTE_ERROR_LOG(rc); return rc; } @@ -50,21 +50,21 @@ int orte_pls_base_finalize(void) } -int orte_pls_base_close(void) +int orte_plm_base_close(void) { /* finalize selected module */ - if (orte_pls_base.selected) { - orte_pls.finalize(); + if (orte_plm_base.selected) { + orte_plm.finalize(); } /* Close all open components */ - mca_base_components_close(orte_pls_base.pls_output, - &orte_pls_base.available_components, NULL); - OBJ_DESTRUCT(&orte_pls_base.available_components); + mca_base_components_close(orte_plm_globals.output, + &orte_plm_base.available_components, NULL); + OBJ_DESTRUCT(&orte_plm_base.available_components); /* clearout the orted cmd locks */ - OBJ_DESTRUCT(&orte_pls_base.orted_cmd_lock); - OBJ_DESTRUCT(&orte_pls_base.orted_cmd_cond); + OBJ_DESTRUCT(&orte_plm_globals.orted_cmd_lock); + OBJ_DESTRUCT(&orte_plm_globals.orted_cmd_cond); return ORTE_SUCCESS; } diff --git a/orte/mca/plm/base/plm_base_jobid.c b/orte/mca/plm/base/plm_base_jobid.c new file mode 100644 index 0000000000..1595ba90b2 --- /dev/null +++ b/orte/mca/plm/base/plm_base_jobid.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/util/output.h" +#include "opal/hash_string.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/plm/base/plm_private.h" + +/* + * attempt to create a globally unique name - do a hash + * of the hostname plus pid + */ +int orte_plm_base_set_hnp_name(void) +{ + uint16_t jobfam; + uint32_t hash32; + uint32_t bias; + + /* hash the nodename */ + OPAL_HASH_STR(orte_system_info.nodename, hash32); + + bias = (uint32_t)orte_process_info.pid; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "plm:base:set_hnp_name: initial bias %ld nodename hash %lu", + (long)bias, (unsigned long)hash32)); + + /* fold in the bias */ + hash32 = hash32 ^ bias; + + /* now compress to 16-bits */ + jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "plm:base:set_hnp_name: final jobfam %lu", + (unsigned long)jobfam)); + + /* set the name */ + ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16); + ORTE_PROC_MY_NAME->vpid = 0; + + orte_plm_globals.next_jobid = ORTE_PROC_MY_NAME->jobid + 1; + + /* copy it to the HNP field */ + ORTE_PROC_MY_HNP->jobid = ORTE_PROC_MY_NAME->jobid; + ORTE_PROC_MY_HNP->vpid = ORTE_PROC_MY_NAME->vpid; + + /* done */ + return ORTE_SUCCESS; +} + +/* + * Create a jobid + */ +int orte_plm_base_create_jobid(orte_jobid_t *jobid) +{ + if (ORTE_JOBID_MAX-1 < orte_plm_globals.next_jobid) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + *jobid = ORTE_JOBID_INVALID; + return ORTE_ERR_OUT_OF_RESOURCE; + } + + *jobid = orte_plm_globals.next_jobid++; + return ORTE_SUCCESS; +} diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c new file mode 100644 index 0000000000..0ab19c039b --- /dev/null +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -0,0 +1,875 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#ifdef HAVE_SYS_WAIT_H +#include +#endif + +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/util/output.h" +#include "opal/runtime/opal_progress.h" + +#include "opal/dss/dss.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/iof/iof.h" +#include "orte/mca/ras/ras.h" +#include "orte/mca/rmaps/rmaps.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/grpcomm/grpcomm.h" +#include "orte/mca/odls/odls.h" +#if OPAL_ENABLE_FT == 1 +#include "orte/mca/snapc/snapc.h" +#endif +#include "orte/runtime/orte_wakeup.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/runtime.h" +#include "orte/util/name_fns.h" + +#include "orte/tools/orterun/totalview.h" + +#include "orte/mca/plm/base/plm_private.h" + +static int orte_plm_base_report_launched(orte_jobid_t job); + +int orte_plm_base_setup_job(orte_job_t *jdata) +{ + int rc; + orte_std_cntr_t index; + orte_process_name_t name = {ORTE_JOBID_INVALID, 0}; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:setup_job for job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jdata->jobid))); + + /* insert the job object into the global pool */ + orte_pointer_array_add(&index, orte_job_data, jdata); + + if (ORTE_SUCCESS != (rc = orte_ras.allocate(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* + * setup I/O forwarding + */ + name.jobid = jdata->jobid; + if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDOUT, 1))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDERR, 2))) { + ORTE_ERROR_LOG(rc); + return rc; + } + +#if OPAL_ENABLE_FT == 1 + /* + * Notify the Global SnapC component regarding new job + */ + if( ORTE_SUCCESS != (rc = orte_snapc.setup_job(jdata->jobid) ) ) { + /* Silent Failure :/ JJH */ + ORTE_ERROR_LOG(rc); + } +#endif + + return ORTE_SUCCESS; +} + +int orte_plm_base_launch_apps(orte_jobid_t job) +{ + orte_daemon_cmd_flag_t command; + opal_buffer_t *buffer; + int rc; + orte_process_name_t name = {ORTE_JOBID_INVALID, 0}; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:launch_apps for job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job))); + + /* setup the buffer */ + buffer = OBJ_NEW(opal_buffer_t); + + /* pack the add_local_procs command */ + command = ORTE_DAEMON_ADD_LOCAL_PROCS; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + return rc; + } + + /* let the local launcher provide its required data */ + if (ORTE_SUCCESS != (rc = orte_odls.get_add_procs_data(buffer, job))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* send the command to the daemons */ + if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, + buffer, ORTE_RML_TAG_DAEMON))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + return rc; + } + OBJ_RELEASE(buffer); + + /* wait for all the apps to report launched */ + if (ORTE_SUCCESS != (rc = orte_plm_base_report_launched(job))) { + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:launch failed for job %s on error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job), ORTE_ERROR_NAME(rc))); + return rc; + } + + /* complete wiring up the iof */ + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:launch wiring up iof", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + name.jobid = job; + if (ORTE_SUCCESS != (rc = orte_iof.iof_push(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDIN, 0))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* init the debuggers */ +#if 0 + orte_totalview_init_after_spawn(job); +#endif + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:launch completed for job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job))); + return rc; +} + +void orte_plm_base_launch_failed(orte_jobid_t job, bool callback_active, pid_t pid, + int status, orte_job_state_t state) +{ + int src[3] = {-1, -1, -1}; + opal_buffer_t ack; + int rc; + orte_job_t *jdata; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:launch_failed for job %s during %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job), + (callback_active) ? "daemon launch" : "app launch")); + + if (callback_active) { + /* if we failed while launching daemons, we need to fake a message to + * the daemon callback system so it can break out of its receive loop + */ + src[2] = pid; + if(WIFSIGNALED(status)) { + src[1] = WTERMSIG(status); + } + OBJ_CONSTRUCT(&ack, opal_buffer_t); + if (ORTE_SUCCESS != (rc = opal_dss.pack(&ack, &src, 3, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + } + rc = orte_rml.send_buffer(ORTE_PROC_MY_NAME, &ack, ORTE_RML_TAG_ORTED_CALLBACK, 0); + if (0 > rc) { + ORTE_ERROR_LOG(rc); + } + OBJ_DESTRUCT(&ack); + /* set the flag indicating that a daemon failed so we use the proper + * methods for attempting to shutdown the rest of the system + */ + orte_daemon_died = true; + + } + + /* Set the job state as indicated so orterun's exit status + will be non-zero + */ + /* find the job's data record */ + if (NULL == (jdata = orte_get_job_data_object(job))) { + /* bad jobid */ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + goto WAKEUP; + } + /* set the state */ + jdata->state = state; + +WAKEUP: + /* wakeup so orterun can exit */ + orte_wakeup(status); +} + +/* daemons callback when they start - need to listen for them */ +static int orted_num_callback; +static bool orted_failed_launch; +static orte_job_t *jdatorted; +static orte_proc_t **pdatorted; + +static void orted_report_launch(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata) +{ + char *rml_uri; + int src[4]; + int rc, idx; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:orted_report_launch from daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(sender))); + + /* a daemon actually only sends us back one int value. However, if + * the daemon fails to launch, our local launcher may have additional + * info it wants to pass back to us, so we allow up to four int + * values to be returned. Fortunately, the DSS unpack routine + * knows how to handle this situation - it will only unpack the + * actual number of packed entries up to the number we specify here + */ + idx = 4; + src[0]=src[1]=src[2]=src[3]=0; + rc = opal_dss.unpack(buffer, &src, &idx, OPAL_INT); + if(ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + + if(-1 == src[0]) { + /* one of the daemons has failed to properly launch */ + if(-1 == src[1]) { /* did not die on a signal */ + opal_show_help("help-plm-base.txt", "daemon-died-no-signal", true, src[2]); + } else { /* died on a signal */ + opal_show_help("help-plm-base.txt", "daemon-died-signal", true, + src[2], src[1]); + } + orted_failed_launch = true; + goto CLEANUP; + } + + /* okay, so the daemon says it started up okay - unpack its contact info */ + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &rml_uri, &idx, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + + /* set the contact info into the hash table */ + if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(rml_uri))) { + ORTE_ERROR_LOG(rc); + free(rml_uri); + orted_failed_launch = true; + free(rml_uri); + goto CLEANUP; + } + /* lookup and record this daemon's contact info */ + pdatorted[sender->vpid]->rml_uri = strdup(rml_uri); + free(rml_uri); + + /* set the route to be direct */ + if (ORTE_SUCCESS != (rc = orte_routed.update_route(sender, sender))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + + /* reissue the recv */ + rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ORTED_CALLBACK, + ORTE_RML_NON_PERSISTENT, orted_report_launch, NULL); + if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + } + +CLEANUP: + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:orted_report_launch %s for daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + orted_failed_launch ? "completed" : "failed", + ORTE_NAME_PRINT(sender))); + + if (orted_failed_launch) { + orte_errmgr.incomplete_start(ORTE_PROC_MY_NAME->jobid, jdatorted->aborted_proc->exit_code); + } else { + orted_num_callback++; + } + +} + +int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons) +{ + int rc; + opal_buffer_t *buf; + orte_rml_cmd_flag_t command; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:daemon_callback", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + orted_num_callback = 0; + orted_failed_launch = false; + /* get the orted job data object */ + if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + pdatorted = (orte_proc_t**)(jdatorted->procs->addr); + + rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ORTED_CALLBACK, + ORTE_RML_NON_PERSISTENT, orted_report_launch, NULL); + if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) { + ORTE_ERROR_LOG(rc); + return rc; + } + + while (!orted_failed_launch && + orted_num_callback < num_daemons) { + opal_progress(); + } + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:daemon_callback completed", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* all done launching - update the num_procs in my local structure */ + orte_process_info.num_procs = jdatorted->num_procs; + + /* update everyone's contact info so all daemons + * can talk to each other + */ + buf = OBJ_NEW(opal_buffer_t); + /* pack the update-RML command */ + command = ORTE_RML_UPDATE_CMD; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &command, 1, ORTE_RML_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return rc; + } + /* send it */ + if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, buf, ORTE_RML_TAG_RML_INFO_UPDATE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return rc; + } + /* done with the buffer */ + OBJ_RELEASE(buf); + + return ORTE_SUCCESS; +} + +/* the daemons actually report back that their procs have launched. Each + * daemon will only send one message that contains the launch result + * for their local procs. + */ +static bool app_launch_failed; + +static void app_report_launch(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata) +{ + orte_std_cntr_t cnt; + orte_jobid_t jobid; + orte_vpid_t vpid; + orte_proc_state_t state; + orte_exit_code_t exit_code; + pid_t pid; + orte_job_t *jdata; + orte_proc_t **procs; + int rc; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:app_report_launch from daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(sender))); + + /* unpack the jobid being reported */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &cnt, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + app_launch_failed = true; + goto CLEANUP; + } + /* get the job data object */ + if (NULL == (jdata = orte_get_job_data_object(jobid))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + app_launch_failed = true; + goto CLEANUP; + } + procs = (orte_proc_t**)(jdata->procs->addr); + + /* the daemon will report the vpid, state, and pid of each + * process it launches - we need the pid in particular so + * that any debuggers can attach to the process + */ + cnt = 1; + while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &vpid, &cnt, ORTE_VPID))) { + if (ORTE_VPID_INVALID == vpid) { + /* flag indicating we are done */ + break; + } + /* unpack the pid */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &pid, &cnt, OPAL_PID))) { + ORTE_ERROR_LOG(rc); + app_launch_failed = true; + goto CLEANUP; + } + /* unpack the state */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &state, &cnt, ORTE_PROC_STATE))) { + ORTE_ERROR_LOG(rc); + app_launch_failed = true; + goto CLEANUP; + } + /* unpack the exit code */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &exit_code, &cnt, ORTE_EXIT_CODE))) { + ORTE_ERROR_LOG(rc); + app_launch_failed = true; + goto CLEANUP; + } + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:app_report_launched for proc %s from daemon %s: pid %lu state %0x exit %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&(procs[vpid]->name)), + ORTE_NAME_PRINT(sender), (unsigned long)pid, + (int)state, (int)exit_code)); + + /* lookup the proc and update values */ + procs[vpid]->pid = pid; + procs[vpid]->state = state; + procs[vpid]->exit_code = exit_code; + if (ORTE_PROC_STATE_FAILED_TO_START == state) { + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:app_report_launched daemon %s reports proc %s failed to start", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(sender), + ORTE_NAME_PRINT(&(procs[vpid]->name)))); + if (NULL == jdata->aborted_proc) { + jdata->aborted_proc = procs[vpid]; /* only store this once */ + jdata->state = ORTE_JOB_STATE_FAILED_TO_START; /* update the job state */ + } + app_launch_failed = true; + goto CLEANUP; + } + + /* record that a proc reported */ + jdata->num_launched++; + } + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:app_report_launch reissuing non-blocking recv", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* reissue the non-blocking receive */ + rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_APP_LAUNCH_CALLBACK, + ORTE_RML_NON_PERSISTENT, app_report_launch, NULL); + if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) { + ORTE_ERROR_LOG(rc); + app_launch_failed = true; + } + +CLEANUP: + if (app_launch_failed) { + orte_errmgr.incomplete_start(jdata->jobid, jdata->aborted_proc->exit_code); + } + +} + +static int orte_plm_base_report_launched(orte_jobid_t job) +{ + int rc; + orte_job_t *jdata; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:report_launched for job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job))); + + /* get the job data object */ + if (NULL == (jdata = orte_get_job_data_object(job))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + + /* we should get a callback from every daemon that is involved in + * the launch. Fortunately, the mapper keeps track of this number + * for us since num_nodes = num_participating_daemons + */ + app_launch_failed = false; + rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_APP_LAUNCH_CALLBACK, + ORTE_RML_NON_PERSISTENT, app_report_launch, NULL); + if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) { + ORTE_ERROR_LOG(rc); + return rc; + } + + while (!app_launch_failed && + jdata->num_launched < jdata->num_procs) { + opal_progress(); + } + + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:base:report_launched all apps reported", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* declare the job to be launched, but check to ensure + * the procs haven't already reported in to avoid setting the + * job back to an earlier state + */ + if (jdata->state < ORTE_JOB_STATE_LAUNCHED) { + jdata->state = ORTE_JOB_STATE_LAUNCHED; + } + + return ORTE_SUCCESS; +} + +int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, + char *ess, + int *proc_vpid_index, + int *node_name_index) +{ + char *param = NULL; + int loc_id; + char * amca_param_path = NULL; + char * amca_param_prefix = NULL; + char * tmp_force = NULL; + int i, cnt; + orte_job_t *jdata; + char *rml_uri; + + /* check for debug flags */ + if (orte_debug_flag) { + opal_argv_append(argc, argv, "--debug"); + } + if (orte_debug_daemons_flag) { + opal_argv_append(argc, argv, "--debug-daemons"); + } + if (orte_debug_daemons_file_flag) { + opal_argv_append(argc, argv, "--debug-daemons-file"); + } + if (orted_spin_flag) { + opal_argv_append(argc, argv, "--spin"); + } + + /* tell the orted what SDS component to use */ + opal_argv_append(argc, argv, "-mca"); + opal_argv_append(argc, argv, "ess"); + opal_argv_append(argc, argv, ess); + + /* pass the daemon jobid */ + opal_argv_append(argc, argv, "-mca"); + opal_argv_append(argc, argv, "orte_ess_jobid"); + orte_util_convert_jobid_to_string(¶m, ORTE_PROC_MY_NAME->jobid); + opal_argv_append(argc, argv, param); + free(param); + + /* setup to pass the vpid */ + if (NULL != proc_vpid_index) { + opal_argv_append(argc, argv, "-mca"); + opal_argv_append(argc, argv, "orte_ess_vpid"); + *proc_vpid_index = *argc; + opal_argv_append(argc, argv, "