1
1

Merge the ORTE devel branch into the main trunk. Details of what this means will be circulated separately.

Remains to be tested to ensure everything came over cleanly, so please continue to withhold commits a little longer

This commit was SVN r17632.
Этот коммит содержится в:
Ralph Castain 2008-02-28 01:57:57 +00:00
родитель 76e6334a57
Коммит d70e2e8c2b
1110 изменённых файлов: 46156 добавлений и 107680 удалений

Просмотреть файл

@ -67,9 +67,7 @@ m4_include(config/ompi_check_icc.m4)
m4_include(config/ompi_check_gm.m4)
m4_include(config/ompi_check_mx.m4)
m4_include(config/ompi_check_alps.m4)
m4_include(config/ompi_check_bproc.m4)
m4_include(config/ompi_check_lsf.m4)
m4_include(config/ompi_check_xcpu.m4)
m4_include(config/ompi_check_openib.m4)
m4_include(config/ompi_check_portals.m4)
m4_include(config/ompi_check_psm.m4)

Просмотреть файл

@ -1,67 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# new bproc is LANL versions >= 3.2.0
# old bproc is all Scyld versions and LANL version < 3.2.0
# OMPI_CHECK_BPROC(prefix, [action-if-new-bproc], [action-if-old-bproc],
# [action-if-not-found])
# --------------------------------------------------------
AC_DEFUN([OMPI_CHECK_BPROC],[
AC_ARG_WITH([bproc],
[AC_HELP_STRING([--with-bproc],
[Directory where the BProc software is installed])])
AS_IF([test ! -z "$with_bproc" -a "$with_bproc" = "no"],[$4], [
ompi_check_bproc_save_CPPFLAGS="$CPPFLAGS"
ompi_check_bproc_save_LDFLAGS="$LDFLAGS"
ompi_check_bproc_save_LIBS="$LIBS"
AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"],
[CPPFLAGS="$CPPFLAGS -I$with_bproc/include"
LDFLAGS="$LDFLAGS -L$with_bproc/lib"])
AC_CHECK_HEADERS([sys/bproc.h],
[AC_CHECK_LIB([bproc],
[bproc_numnodes],
[ompi_check_bproc_happy="yes"],
[ompi_check_bproc_happy="no"])],
[ompi_check_bproc_happy="no"])
# Check for Scyld bproc or an old version of LANL Bproc (pre 3.2.0)
AS_IF([test "$ompi_check_bproc_happy" = "yes"],
[AC_CHECK_HEADERS([sys/bproc_common.h],[ompi_check_bproc_happy="new"],
[ompi_check_bproc_happy="old"],
[#include <stdint.h>
#include <sys/socket.h>])])
CPPFLAGS="$ompi_check_bproc_save_CPPFLAGS"
LDFLAGS="$ompi_check_bproc_save_LDFLAGS"
LIBS="$ompi_check_bproc_save_LIBS"
AS_IF([test "$ompi_check_bproc_happy" != "no"],
[AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"],
[$1_CPPFLAGS="$$1_CPPFLAGS -I$with_bproc/include"
$1_LDFLAGS="$$1_LDFLAGS -L$with_bproc/lib"])
$1_LIBS="$$1_LIBS -lbproc"
AS_IF([test "$ompi_check_bproc_happy" = "new"], [$2], [$3])],
[AS_IF([test ! -z "$with_bproc"],
[AC_MSG_ERROR([BProc support request but not found. Perhaps
you need to specify the location of the BProc libraries.])])
$4])
])
])

Просмотреть файл

@ -1,63 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2006 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# OMPI_CHECK_XCPU(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
AC_DEFUN([OMPI_CHECK_XCPU],[
AC_ARG_WITH([xcpu],
[AC_HELP_STRING([--with-xcpu],
[=yes will Build XCPU launcher component (default: no)])])
AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" = "no"],[$3], [
ompi_check_xcpu_save_CPPFLAGS="$CPPFLAGS"
ompi_check_xcpu_save_LDFLAGS="$LDFLAGS"
ompi_check_xcpu_save_LIBS="$LIBS"
AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" != "yes"],
[CPPFLAGS="$CPPFLAGS -I$with_xcpu/include"
LDFLAGS="$LDFLAGS -L$with_xcpu/lib"])
AC_CHECK_HEADERS([libxcpu.h],
[AC_CHECK_LIB([xcpu],
[xp_command_create],
[ompi_check_xcpu_happy="yes"],
[ompi_check_xcpu_happy="no"],
[-lstrutil -lspclient -lspfs -lelf])],
[ompi_check_xcpu_happy="no"],
[#include <stdio.h>
#include <spfs.h>
#include <spclient.h>
#include <strutil.h>])
CPPFLAGS="$ompi_check_xcpu_save_CPPFLAGS"
LDFLAGS="$ompi_check_xcpu_save_LDFLAGS"
LIBS="$ompi_check_xcpu_save_LIBS"
AS_IF([test "$ompi_check_xcpu_happy" != "no"],
[AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" != "yes"],
[$1_CPPFLAGS="$$1_CPPFLAGS -I$with_xcpu/include"
$1_LDFLAGS="$$1_LDFLAGS -L$with_xcpu/lib"])
$1_LIBS="$$1_LIBS -lxcpu -lstrutil -lspclient -lspfs -lelf" $2],
[AS_IF([test ! -z "$with_xcpu"],
[AC_MSG_ERROR([Xcpu support request but not found. Perhaps
you need to specify the location of the Xcpu libraries.])])
$3])
])
])

Просмотреть файл

@ -546,6 +546,25 @@ AC_DEFINE_UNQUOTED([ORTE_ENABLE_JUMBO_APPS], [$orte_want_jumbo_apps],
[Enable support for applications in excess of 32K processes and/or 32K jobs, or running on clusters in excess of 32k nodes])
#
# Minimal RTE support
#
AC_MSG_CHECKING([if want full RTE support])
AC_ARG_ENABLE([rte],
[AC_HELP_STRING([--disable-rte-support],
[Disable RTE support for systems that do not require it (default: full RTE support enabled)])])
if test "$enable_rte_support" = "no"; then
AC_MSG_RESULT([no])
orte_disable_full_support=1
else
AC_MSG_RESULT([yes])
orte_disable_full_support=0
fi
AC_DEFINE_UNQUOTED([ORTE_DISABLE_FULL_SUPPORT], [$orte_disable_full_support],
[Enable full RTE support])
AM_CONDITIONAL(ORTE_DISABLE_FULL_SUPPORT, test "$enable_rte_support" = "no")
#
# Cross-compile data
#

Просмотреть файл

@ -50,7 +50,7 @@ AC_DEFUN([OMPI_MCA],[
AC_ARG_ENABLE([mca-no-build],
[AC_HELP_STRING([--enable-mca-no-build=LIST],
[Comma-separated list of <type>-<component> pairs
that will not be built. Example: "--enable-mca-no-build=maffinity-libnuma,btl-portals" will disable building both the "libnuma" maffinity and "portals" btl components.])])
that will not be built. Example: "--enable-mca-no-build=maffinity,btl-portals" will disable building all maffinity components and the "portals" btl components.])])
AC_ARG_ENABLE(mca-dso,
AC_HELP_STRING([--enable-mca-dso=LIST],
[Comma-separated list of types and/or
@ -88,9 +88,13 @@ AC_DEFUN([OMPI_MCA],[
for item in $enable_mca_no_build; do
type="`echo $item | cut -s -f1 -d-`"
comp="`echo $item | cut -s -f2- -d-`"
if test -z $type -o -z $comp ; then
AC_MSG_ERROR([*** The enable-no-build flag requires a
*** list of type-component pairs. Invalid input detected.])
if test -z $type ; then
type=$item
fi
if test -z $comp ; then
str="`echo DISABLE_${type}=1 | sed s/-/_/g`"
eval $str
msg="$item $msg"
else
str="`echo DISABLE_${type}_${comp}=1 | sed s/-/_/g`"
eval $str
@ -359,9 +363,13 @@ AC_DEFUN([MCA_CONFIGURE_FRAMEWORK],[
# abort with a reasonable message.
m4_ifdef([mca_$2_no_config_component_list], [],
[m4_fatal([Could not find mca_$2_no_config_component_list - rerun autogen.sh without -l])])
# make sure priority stuff set right
m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST],
[m4_ifval(mca_$2_no_config_component_list,
[m4_fatal([Framework $2 using STOP_AT_FIRST but at least one component has no configure.m4])])])
m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST_PRIORITY],
[m4_ifval(mca_$2_no_config_component_list,
[m4_fatal([Framework $2 using STOP_AT_FIRST but at least one component has no configure.m4])])])
m4_foreach(mca_component, [mca_$2_no_config_component_list],
[m4_ifval(mca_component,
[MCA_CONFIGURE_NO_CONFIG_COMPONENT($1, $2, mca_component,
@ -404,7 +412,7 @@ AC_DEFUN([MCA_CONFIGURE_FRAMEWORK],[
# It would be really hard to run these for "find first that
# works", so we don't :)
m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST], [],
[m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST], [],
[m4_if(OMPI_EVAL_ARG([MCA_]mca_framework[_CONFIGURE_MODE]), [STOP_AT_FIRST_PRIORITY], [],
[AS_IF([test "$3" != "0"],
[MCA_CONFIGURE_ALL_CONFIG_COMPONENTS($1, $2, [all_components],
[static_components], [dso_components],
@ -876,6 +884,11 @@ AC_DEFUN([MCA_COMPONENT_BUILD_CHECK],[
fi
# if we were explicitly disabled, don't build :)
str="DISABLED_COMPONENT_CHECK=\$DISABLE_${framework}"
eval $str
if test "$DISABLED_COMPONENT_CHECK" = "1" ; then
want_component=0
fi
str="DISABLED_COMPONENT_CHECK=\$DISABLE_${framework}_$component"
eval $str
if test "$DISABLED_COMPONENT_CHECK" = "1" ; then

Просмотреть файл

@ -1263,11 +1263,7 @@ AC_CONFIG_FILES([
orte/include/Makefile
orte/etc/Makefile
orte/tools/orteboot/Makefile
orte/tools/orted/Makefile
orte/tools/ortehalt/Makefile
orte/tools/ortekill/Makefile
orte/tools/orteprobe/Makefile
orte/tools/orterun/Makefile
orte/tools/wrappers/Makefile
orte/tools/wrappers/ortecc-wrapper-data.txt
@ -1304,6 +1300,7 @@ AC_CONFIG_FILES([
ompi/tools/wrappers/mpif77-wrapper-data.txt
ompi/tools/wrappers/mpif90-wrapper-data.txt
ompi/tools/ortetools/Makefile
ompi/tools/ompi-server/Makefile
test/Makefile
test/event/Makefile

Просмотреть файл

@ -6,7 +6,8 @@ enable_pretty_print_stacktrace=no
enable_dlopen=no
with_portals_config=redstorm
with_memory_manager=none
enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,gpr-proxy,gpr-replica,iof-svc,ns-proxy,oob-tcp,pls-rsh,ras-dash_host,ras-hostfile,ras-localhost,rds-hostfile,rds-resfile,rmaps-round_robin,rmgr-proxy,rmgr-urm,rml-oob,sds-env,sds-seed,sds-singleton,btl-sm,btl-self,btl-tcp,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,filem-rsh,grpcomm-basic
enable_mca_no_build=carto-file,maffinity,paffinity,timer,errmgr,iof,odls,oob,plm,ras,rmaps,rml,routed,filem,btl-sm,btl-self,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,libnbc,vt
with_rte_support=no
enable_heterogeneous=no
enable_pty_support=no
enable_mem_debug=no

Просмотреть файл

@ -1,3 +1,7 @@
enable_mem_debug=no
enable_mem_profile=no
enable_debug=no
enable_debug_symbols=no
enable_io_romio=yes
enable_static=yes
enable_shared=no
@ -6,12 +10,9 @@ enable_pretty_print_stacktrace=no
enable_dlopen=no
with_portals_config=cnl_modex
with_memory_manager=none
enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,pls-rsh,pml-dr,filem-rsh,grpcomm-cnos,pls-cnos,rmgr-cnos,rml-cnos,routed-cnos,sds-portals_utcp,sds-cnos
enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,ess-cnos,pml-dr,filem-rsh,grpcomm-cnos,rmgr-cnos,rml-cnos
enable_heterogeneous=no
enable_pty_support=no
enable_mem_debug=no
enable_mem_profile=no
enable_debug_symbols=no
enable_binaries=yes
ompi_cv_f77_sizeof_LOGICAL=${ompi_cv_f77_sizeof_LOGICAL=4}

20
contrib/platform/lanl/roadrunner/debug Обычный файл
Просмотреть файл

@ -0,0 +1,20 @@
with_threads=no
enable_dlopen=no
enable_pty_support=no
with_tm=/opt/PBS
with_wrapper_cflags=-I/opt/panfs/include
LDFLAGS=-L/opt/PBS/lib64
with_openib=/opt/ofed
with_io_romio_flags=--with-file-system=ufs+nfs+panfs
with_memory_manager=no
enable_mem_debug=yes
enable_mem_profile=no
enable_debug_symbols=yes
enable_binaries=yes
with_devel_headers=yes
enable_heterogeneous=yes
enable_picky=yes
enable_debug=yes
enable_shared=yes
enable_static=no
with_slurm=no

Просмотреть файл

@ -0,0 +1,64 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the default system-wide MCA parameters defaults file.
# Specifically, the MCA parameter "mca_param_files" defaults to a
# value of
# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf"
# (this file is the latter of the two). So if the default value of
# mca_param_files is not changed, this file is used to set system-wide
# MCA parameters. This file can therefore be used to set system-wide
# default MCA parameters for all users. Of course, users can override
# these values if they want, but this file is an excellent location
# for setting system-specific MCA parameters for those users who don't
# know / care enough to investigate the proper values for them.
# Note that this file is only applicable where it is visible (in a
# filesystem sense). Specifically, MPI processes each read this file
# during their startup to determine what default values for MCA
# parameters should be used. mpirun does not bundle up the values in
# this file from the node where it was run and send them to all nodes;
# the default value decisions are effectively distributed. Hence,
# these values are only applicable on nodes that "see" this file. If
# $sysconf is a directory on a local disk, it is likely that changes
# to this file will need to be propagated to other nodes. If $sysconf
# is a directory that is shared via a networked filesystem, changes to
# this file will be visible to all nodes that share this $sysconf.
# The format is straightforward: one per line, mca_param_name =
# rvalue. Quoting is ignored (so if you use quotes or escape
# characters, they'll be included as part of the value). For example:
# Disable run-time MPI parameter checking
# mpi_param_check = 0
# Note that the value "~/" will be expanded to the current user's home
# directory. For example:
# Change component loading path
# component_path = /usr/local/lib/openmpi:~/my_openmpi_components
# See "ompi_info --param all all" for a full listing of Open MPI MCA
# parameters available and their default values.
oob_tcp_if_include = ib0
mpi_preconnect_oob = 1
btl_sm_free_list_max = 768
oob_tcp_connect_timeout = 600
oob_tcp_if_include = ib0

18
contrib/platform/lanl/roadrunner/optimized Обычный файл
Просмотреть файл

@ -0,0 +1,18 @@
with_threads=no
enable_dlopen=no
enable_pty_support=no
with_tm=/opt/PBS
LDFLAGS=-L/opt/PBS/lib64
with_openib=/opt/ofed
with_memory_manager=yes
enable_mem_debug=no
enable_mem_profile=no
enable_debug_symbols=no
enable_binaries=yes
with_devel_headers=no
enable_heterogeneous=yes
enable_debug=no
enable_shared=yes
with_wrapper_cflags=-I/opt/panfs/include
with_io_romio_flags=--with-file-system=ufs+nfs+panfs
with_slurm=no

Просмотреть файл

@ -10,13 +10,9 @@ enable_dlopen=no
with_portals_config=utcp
with_memory_manager=no
enable_heterogeneous=no
enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,gpr-proxy,gpr-replica,rml-oob,btl-sm,mpool-sm,btl-self,ns-proxy,rds-resfile,rds-hostfile,sds-env,sds-pipe,sds-seed,sds-singleton,coll-hierarch,coll-sm,pml-dr,btl-tcp,oob-tcp,ras-dash_host,ras-hostfile,ras-localhost,rmaps-round_robin,rmgr-urm,rmgr-proxy,pls-fork,pls-rsh,common-sm,iof-svc
with_slurm=no
with_bproc=no
with_mvapi=no
enable_mca_no_build=maffinity,paffinity,timer,errmgr,iof,odls,oob,plm,ras,rmaps,rml,routed,btl-sm,mpool-sm,btl-self,coll-hierarch,coll-sm,pml-dr,btl-tcp,common-sm
with_rte_support=no
with_openib=no
with_gm=no
with_mx=no
with_rml_cnos=utcp
with_rmgr_cnos=utcp
enable_binaries=no

Просмотреть файл

@ -5,7 +5,7 @@ with_threads=no
enable_pretty_print_stacktrace=no
enable_dlopen=no
with_memory_manager=none
enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,allocator-basic,rcache-vma,pls-gridengine,pls-slurm,ras-slurm,ras-gridengine,btl-sm,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,pml-cm,mpool-rdma,osc-rdma,sds-slurm,backtrace-darwin,memory-darwin,memory-malloc_hook,memory_ptmalloc2,paffinity-solaris,paffinity-windows,timer-aix,timer-altix,timer-darwin,timer-solaris,timer-windows
enable_mca_no_build=maffinity,paffinity,timer,allocator-basic,rcache-vma,plm-gridengine,plm-slurm,ras-slurm,ras-gridengine,btl-sm,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,pml-cm,mpool-rdma,osc-rdma,ess-slurm,backtrace-darwin,memory-darwin,memory-malloc_hook,memory_ptmalloc2
enable_heterogeneous=no
enable_pty_support=no
enable_mem_debug=no

Просмотреть файл

@ -6,7 +6,8 @@ enable_pretty_print_stacktrace=no
enable_dlopen=no
with_portals_config=redstorm
with_memory_manager=none
enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,timer-linux,gpr-proxy,gpr-replica,iof-svc,ns-proxy,oob-tcp,pls-rsh,ras-dash_host,ras-hostfile,ras-localhost,rds-hostfile,rds-resfile,rmaps-round_robin,rmgr-proxy,rmgr-urm,rml-oob,sds-env,sds-seed,sds-singleton,btl-sm,btl-self,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,filem-rsh,grpcomm-basic
enable_mca_no_build=carto-file,maffinity,paffinity,timer,errmgr,iof,odls,oob,plm,ras,rmaps,rml,routed,filem,btl-sm,btl-self,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,libnbc,vt
with_rte_support=no
enable_heterogeneous=no
enable_pty_support=no
enable_mem_debug=no

Просмотреть файл

@ -25,6 +25,4 @@ headers += \
libmpi_la_SOURCES += \
communicator/comm_init.c \
communicator/comm.c \
communicator/comm_cid.c \
communicator/comm_dyn.c \
communicator/comm_publish.c
communicator/comm_cid.c

Просмотреть файл

@ -23,7 +23,10 @@
#include <stdio.h>
#include "ompi/constants.h"
#include "orte/dss/dss.h"
#include "opal/dss/dss.h"
#include "orte/util/name_fns.h"
#include "ompi/proc/proc.h"
#include "opal/threads/mutex.h"
#include "opal/util/bit_ops.h"
@ -31,7 +34,7 @@
#include "opal/util/convert.h"
#include "ompi/mca/topo/topo.h"
#include "ompi/mca/topo/base/base.h"
#include "orte/mca/ns/ns.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/attribute/attribute.h"
#include "ompi/communicator/communicator.h"
@ -139,7 +142,7 @@ int ompi_comm_set ( ompi_communicator_t **ncomm,
/* Check how many different jobids are represented in this communicator.
Necessary for the disconnect of dynamic communicators. */
ompi_comm_mark_dyncomm (newcomm);
ompi_dpm.mark_dyncomm (newcomm);
/* Set error handler */
newcomm->error_handler = errh;
@ -1035,7 +1038,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
ompi_proc_t **rprocs=NULL;
orte_std_cntr_t size_len;
int int_len, rlen;
orte_buffer_t *sbuf=NULL, *rbuf=NULL;
opal_buffer_t *sbuf=NULL, *rbuf=NULL;
void *sendbuf;
char *recvbuf;
ompi_proc_t **proc_list=NULL;
@ -1045,7 +1048,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
local_size = ompi_comm_size (local_comm);
if (local_rank == local_leader) {
sbuf = OBJ_NEW(orte_buffer_t);
sbuf = OBJ_NEW(opal_buffer_t);
if (NULL == sbuf) {
rc = ORTE_ERROR;
goto err_exit;
@ -1065,7 +1068,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
if ( OMPI_SUCCESS != rc ) {
goto err_exit;
}
if (ORTE_SUCCESS != (rc = orte_dss.unload(sbuf, &sendbuf, &size_len))) {
if (ORTE_SUCCESS != (rc = opal_dss.unload(sbuf, &sendbuf, &size_len))) {
goto err_exit;
}
@ -1131,13 +1134,13 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
goto err_exit;
}
rbuf = OBJ_NEW(orte_buffer_t);
rbuf = OBJ_NEW(opal_buffer_t);
if (NULL == rbuf) {
rc = ORTE_ERROR;
goto err_exit;
}
if (ORTE_SUCCESS != (rc = orte_dss.load(rbuf, recvbuf, rlen))) {
if (ORTE_SUCCESS != (rc = opal_dss.load(rbuf, recvbuf, rlen))) {
goto err_exit;
}
@ -1250,7 +1253,7 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high )
theirproc = ompi_group_peer_lookup(intercomm->c_remote_group,0);
mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID;
rc = orte_ns.compare_fields(mask, &(ourproc->proc_name), &(theirproc->proc_name));
rc = orte_util_compare_name_fields(mask, &(ourproc->proc_name), &(theirproc->proc_name));
if ( 0 > rc ) {
flag = true;
}
@ -1611,7 +1614,7 @@ static int ompi_comm_fill_rest (ompi_communicator_t *comm,
/* verify whether to set the flag, that this comm
contains process from more than one jobid. */
ompi_comm_mark_dyncomm (comm);
ompi_dpm.mark_dyncomm (comm);
/* set the error handler */
comm->error_handler = errh;

Просмотреть файл

@ -22,9 +22,9 @@
#include "ompi_config.h"
#include "orte/dss/dss.h"
#include "opal/dss/dss.h"
#include "opal/util/convert.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/types.h"
#include "ompi/communicator/communicator.h"
#include "ompi/proc/proc.h"
#include "ompi/constants.h"
@ -35,10 +35,10 @@
#include "orte/mca/rml/rml.h"
#include "ompi/request/request.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/mca/dpm/dpm.h"
BEGIN_C_DECLS
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* These functions make sure, that we determine the global result over
* an intra communicators (simple), an inter-communicator and a
@ -774,26 +774,26 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf,
}
if (local_rank == local_leader ) {
orte_buffer_t *sbuf;
orte_buffer_t *rbuf;
opal_buffer_t *sbuf;
opal_buffer_t *rbuf;
sbuf = OBJ_NEW(orte_buffer_t);
rbuf = OBJ_NEW(orte_buffer_t);
sbuf = OBJ_NEW(opal_buffer_t);
rbuf = OBJ_NEW(opal_buffer_t);
if (ORTE_SUCCESS != (rc = orte_dss.pack(sbuf, tmpbuf, (orte_std_cntr_t)count, ORTE_INT))) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(sbuf, tmpbuf, (orte_std_cntr_t)count, OPAL_INT))) {
goto exit;
}
if ( send_first ) {
rc = orte_rml.send_buffer(remote_leader, sbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0);
rc = orte_rml.recv_buffer(remote_leader, rbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0);
rc = orte_rml.send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0);
rc = orte_rml.recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0);
}
else {
rc = orte_rml.recv_buffer(remote_leader, rbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0);
rc = orte_rml.send_buffer(remote_leader, sbuf, ORTE_RML_TAG_COMM_CID_INTRA, 0);
rc = orte_rml.recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0);
rc = orte_rml.send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0);
}
if (ORTE_SUCCESS != (rc = orte_dss.unpack(rbuf, outbuf, &size_count, ORTE_INT))) {
if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, outbuf, &size_count, OPAL_INT))) {
goto exit;
}
OBJ_RELEASE(sbuf);
@ -834,6 +834,5 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf,
return (rc);
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
END_C_DECLS

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -28,11 +28,11 @@
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/mca/topo/base/base.h"
#include "orte/mca/ns/base/base.h"
#include "ompi/runtime/params.h"
#include "ompi/communicator/communicator.h"
#include "ompi/attribute/attribute.h"
#include "ompi/mca/topo/topo.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/memchecker.h"
/*
@ -210,7 +210,7 @@ int ompi_comm_finalize(void)
OBJ_DESTRUCT( &ompi_mpi_comm_self );
/* disconnect all dynamic communicators */
ompi_comm_dyn_finalize();
ompi_dpm.dyn_finalize();
/* Shut down MPI_COMM_WORLD */
OBJ_DESTRUCT( &ompi_mpi_comm_world );

Просмотреть файл

@ -1,166 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include <stdio.h>
#include "ompi/communicator/communicator.h"
#include "ompi/proc/proc.h"
#include "ompi/constants.h"
#include "ompi/mca/pml/pml.h"
#include "orte/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/rml/rml_types.h"
#define OMPI_COMM_PORT_KEY "ompi-port-name"
int ompi_open_port(char *port_name)
{
ompi_proc_t **myproc=NULL;
char *name=NULL;
size_t size=0;
orte_rml_tag_t lport_id=0;
int rc;
/*
* The port_name is equal to the OOB-contact information
* and an integer. The reason for adding the integer is
* to make the port unique for multi-threaded scenarios.
*/
myproc = ompi_proc_self (&size);
if (ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string (&name, &(myproc[0]->proc_name)))) {
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ns.assign_rml_tag(&lport_id, NULL))) {
return rc;
}
sprintf (port_name, "%s:%d", name, lport_id);
free ( myproc );
free ( name );
return OMPI_SUCCESS;
}
/* takes a port_name and separates it into the process_name
and the tag
*/
char *ompi_parse_port (char *port_name, orte_rml_tag_t *tag)
{
char tmp_port[MPI_MAX_PORT_NAME], *tmp_string;
tmp_string = (char *) malloc (MPI_MAX_PORT_NAME);
if (NULL == tmp_string ) {
return NULL;
}
strncpy (tmp_port, port_name, MPI_MAX_PORT_NAME);
strncpy (tmp_string, strtok(tmp_port, ":"), MPI_MAX_PORT_NAME);
sscanf( strtok(NULL, ":"),"%d", (int*)tag);
return tmp_string;
}
/*
* publish the port_name using the service_name as a token
* jobid and vpid are used later to make
* sure, that only this process can unpublish the information.
*/
int ompi_comm_namepublish ( char *service_name, char *port_name )
{
orte_gpr_value_t *value;
int rc;
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_TOKENS_AND | ORTE_GPR_OVERWRITE,
OMPI_NAMESPACE_SEGMENT, 1, 1))) {
ORTE_ERROR_LOG(rc);
return rc;
}
value->tokens[0] = strdup(service_name);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), OMPI_COMM_PORT_KEY, ORTE_STRING, port_name))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(value);
return rc;
}
char* ompi_comm_namelookup ( char *service_name )
{
char *token[2], *key[2];
orte_gpr_keyval_t **keyvals=NULL;
orte_gpr_value_t **values;
orte_std_cntr_t cnt=0;
char *stmp=NULL;
int ret;
token[0] = service_name;
token[1] = NULL;
key[0] = strdup(OMPI_COMM_PORT_KEY);
key[1] = NULL;
ret = orte_gpr.get(ORTE_GPR_TOKENS_AND, OMPI_NAMESPACE_SEGMENT,
token, key, &cnt, &values);
if (ORTE_SUCCESS != ret) {
return NULL;
}
if ( 0 < cnt && NULL != values[0] ) { /* should be only one, if any */
keyvals = values[0]->keyvals;
stmp = strdup((const char*)keyvals[0]->value->data);
OBJ_RELEASE(values[0]);
}
return (stmp);
}
/*
* delete the entry. Just the process who has published
* the service_name, has the right to remove this
* service. Will be done later, by adding jobid and vpid
* as tokens
*/
int ompi_comm_nameunpublish ( char *service_name )
{
char *token[2];
token[0] = service_name;
token[1] = NULL;
#if 0
return orte_gpr.delete_entries(ORTE_GPR_TOKENS_AND,
OMPI_NAMESPACE_SEGMENT,
token, NULL);
#endif
return OMPI_SUCCESS;
}

Просмотреть файл

@ -65,7 +65,6 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t);
/* a set of special tags: */
/* to recognize an MPI_Comm_join in the comm_connect_accept routine. */
#define OMPI_COMM_JOIN_TAG -32000
#define OMPI_COMM_ALLGATHER_TAG -31078
#define OMPI_COMM_BARRIER_TAG -31079
@ -356,7 +355,7 @@ struct ompi_communicator_t {
* the OOB version.
* This routine has to be thread safe in the final version.
*/
int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
OMPI_DECLSPEC int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
ompi_communicator_t* oldcomm,
ompi_communicator_t* bridgecomm,
void* local_leader,
@ -373,7 +372,7 @@ struct ompi_communicator_t {
* This is THE routine, where all the communicator stuff
* is really set.
*/
int ompi_comm_set ( ompi_communicator_t** newcomm,
OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm,
ompi_communicator_t* oldcomm,
int local_size,
int *local_ranks,
@ -412,7 +411,7 @@ struct ompi_communicator_t {
int high );
int ompi_comm_activate ( ompi_communicator_t* newcomm,
OMPI_DECLSPEC int ompi_comm_activate ( ompi_communicator_t* newcomm,
ompi_communicator_t* oldcomm,
ompi_communicator_t* bridgecomm,
void* local_leader,
@ -427,35 +426,9 @@ struct ompi_communicator_t {
*/
int ompi_comm_dump ( ompi_communicator_t *comm );
/**
* a simple function to determint a port number
*/
int ompi_open_port (char *port_name);
/**
* takes a port_name and returns the oob-contact information
* and the tag
*/
char * ompi_parse_port (char *port_name, orte_rml_tag_t *tag) ;
/**
* routines handling name publishing, lookup and unpublishing
*/
int ompi_comm_namepublish ( char *service_name, char *port_name );
char* ompi_comm_namelookup ( char *service_name );
int ompi_comm_nameunpublish ( char *service_name );
/* setting name */
int ompi_comm_set_name (ompi_communicator_t *comm, char *name );
/* THE routine for dynamic process management. This routine
sets the connection up between two independent applications.
*/
int ompi_comm_connect_accept ( ompi_communicator_t *comm, int root,
orte_process_name_t *port, int send_first,
ompi_communicator_t **newcomm, orte_rml_tag_t tag);
/*
* these are the init and finalize functions for the comm_reg
* stuff. These routines are necessary for handling multi-threading
@ -464,59 +437,9 @@ struct ompi_communicator_t {
void ompi_comm_reg_init(void);
void ompi_comm_reg_finalize(void);
/* start the new processes from MPI_Comm_spawn_multiple. Initial
* version, very rough
*/
int ompi_comm_start_processes(int count, char **array_of_commands,
char ***array_of_argv,
int *array_of_maxprocs,
MPI_Info *array_of_info,
char *port_name);
/*
* This routine checks, whether an application has been spawned
* by another MPI application, or has been independently started.
* If it has been spawned, it establishes the parent communicator.
* Since the routine has to communicate, it should be among the last
* steps in MPI_Init, to be sure that everything is already set up.
*/
int ompi_comm_dyn_init(void);
/**
* Executes internally a disconnect on all dynamic communicators
* in case the user did not disconnect them.
*/
int ompi_comm_dyn_finalize(void);
/* this routine counts the number of different jobids of the processes
given in a certain communicator. If there is more than one jobid,
we mark the communicator as 'dynamic'. This is especially relevant
for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have
to wait for all still connected processes. */
/* global variable to save the number od dynamic communicators */
extern int ompi_comm_num_dyncomm;
void ompi_comm_mark_dyncomm (ompi_communicator_t *comm);
/* the next two routines implement a kind of non-blocking barrier.
the only difference is, that you can wait for the completion
of more than one initiated ibarrier. This is required for waiting
for all still connected processes in MPI_Finalize.
ompi_comm_disconnect_init returns a handle, which has to be passed in
to ompi_comm_disconnect_waitall. The second routine blocks, until
all non-blocking barriers described by the handles are finished.
The communicators can than be released.
*/
struct ompi_comm_disconnect_obj {
ompi_communicator_t *comm;
int size;
struct ompi_request_t **reqs;
int buf;
};
typedef struct ompi_comm_disconnect_obj ompi_comm_disconnect_obj;
ompi_comm_disconnect_obj *ompi_comm_disconnect_init (ompi_communicator_t *comm);
void ompi_comm_disconnect_waitall (int count, ompi_comm_disconnect_obj **objs );
END_C_DECLS

Просмотреть файл

@ -24,6 +24,7 @@
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#include <stdio.h>
#include "opal/prefetch.h"

Просмотреть файл

@ -24,6 +24,8 @@
#include "ompi/datatype/convertor_internal.h"
#include "ompi/datatype/datatype_internal.h"
#include <stdio.h>
#if OMPI_ENABLE_DEBUG
extern int ompi_unpack_debug;
#define DO_DEBUG(INST) if( ompi_unpack_debug ) { INST }

Просмотреть файл

@ -25,6 +25,7 @@
#include "ompi/datatype/datatype.h"
#include "ompi/datatype/datatype_internal.h"
#include "ompi/datatype/convertor_internal.h"
#include <stdio.h>
#if OMPI_ENABLE_DEBUG
#include "opal/mca/base/mca_base_param.h"

Просмотреть файл

@ -136,7 +136,7 @@ OMPI_DECLSPEC extern ompi_group_t ompi_mpi_group_null;
*
* @return Pointer to new group structure
*/
ompi_group_t *ompi_group_allocate(int group_size);
OMPI_DECLSPEC ompi_group_t *ompi_group_allocate(int group_size);
ompi_group_t *ompi_group_allocate_sporadic(int group_size);
ompi_group_t *ompi_group_allocate_strided(void);
ompi_group_t *ompi_group_allocate_bmap(int orig_group_size, int group_size);

Просмотреть файл

@ -286,7 +286,7 @@ typedef int (MPI_Grequest_cancel_function)(void *, int);
#define MPI_ARGV_NULL ((char **) 0) /* NULL argument vector */
#define MPI_ARGVS_NULL ((char ***) 0) /* NULL argument vectors */
#define MPI_ERRCODES_IGNORE ((int *) 0) /* don't return error codes */
#define MPI_MAX_PORT_NAME 36 /* max port name length */
#define MPI_MAX_PORT_NAME 256 /* max port name length */
#define MPI_MAX_NAME_LEN MPI_MAX_PORT_NAME /* max port name length */
#define MPI_ORDER_C 0 /* C row major order */
#define MPI_ORDER_FORTRAN 1 /* Fortran column major order */

Просмотреть файл

@ -133,7 +133,7 @@
parameter (MPI_BSEND_OVERHEAD=128)
parameter (MPI_MAX_INFO_KEY=35)
parameter (MPI_MAX_INFO_VAL=255)
parameter (MPI_MAX_PORT_NAME=35)
parameter (MPI_MAX_PORT_NAME=255)
parameter (MPI_MAX_OBJECT_NAME=63)
parameter (MPI_ORDER_C=0)
parameter (MPI_ORDER_FORTRAN=1)

Просмотреть файл

@ -19,7 +19,7 @@
#ifndef OMPI_CONSTANTS_H
#define OMPI_CONSTANTS_H
#include "orte/orte_constants.h"
#include "orte/constants.h"
#define OMPI_ERR_BASE ORTE_ERR_MAX

Просмотреть файл

@ -10,6 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -27,6 +28,7 @@
#include <stdlib.h>
#endif
#include <limits.h>
#include <ctype.h>
#include "ompi/constants.h"
#include "ompi/info/info.h"
@ -207,6 +209,46 @@ int ompi_info_get (ompi_info_t *info, char *key, int valuelen,
}
/*
* Similar to ompi_info_get(), but cast the result into a boolean
* using some well-defined rules.
*/
int ompi_info_get_bool(ompi_info_t *info, char *key, bool *value, int *flag)
{
char *ptr;
char str[256];
str[sizeof(str) - 1] = '\0';
ompi_info_get(info, key, sizeof(str) - 1, str, flag);
if (*flag) {
*value = false;
/* Trim whitespace */
ptr = str + sizeof(str) - 1;
while (ptr >= str && isspace(*ptr)) {
*ptr = '\0';
--ptr;
}
ptr = str;
while (ptr < str + sizeof(str) - 1 && *ptr != '\0' &&
isspace(*ptr)) {
++ptr;
}
if ('\0' != *ptr) {
if (isdigit(*ptr)) {
*value = (bool) atoi(ptr);
} else if (0 == strcasecmp(ptr, "yes") ||
0 == strcasecmp(ptr, "true")) {
*value = true;
} else if (0 != strcasecmp(ptr, "no") &&
0 != strcasecmp(ptr, "false")) {
/* RHC unrecognized value -- print a warning? */
}
}
}
return MPI_SUCCESS;
}
/*
* Delete a key from an info
*/

Просмотреть файл

@ -10,6 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -148,6 +149,32 @@ int ompi_info_set (ompi_info_t *info, char *key, char *value);
*/
int ompi_info_free (ompi_info_t **info);
/**
* Get a (key, value) pair from an 'MPI_Info' object and assign it
* into a boolen output.
*
* @param info Pointer to ompi_info_t object
* @param key null-terminated character string of the index key
* @param value Boolean output value
* @param flag true (1) if 'key' defined on 'info', false (0) if not
* (logical)
*
* @retval MPI_SUCCESS
*
* If found, the string value will be cast to the boolen output in
* the following manner:
*
* - If the string value is digits, the return value is "(bool)
* atoi(value)"
* - If the string value is (case-insensitive) "yes" or "true", the
* result is true
* - If the string value is (case-insensitive) "no" or "false", the
* result is false
* - All other values are false
*/
OMPI_DECLSPEC int ompi_info_get_bool (ompi_info_t *info, char *key, bool *value,
int *flag);
/**
* Get a (key, value) pair from an 'MPI_Info' object
*
@ -163,8 +190,8 @@ int ompi_info_free (ompi_info_t **info);
* In C and C++, 'valuelen' should be one less than the allocated
* space to allow for for the null terminator.
*/
int ompi_info_get (ompi_info_t *info, char *key, int valuelen,
char *value, int *flag);
OMPI_DECLSPEC int ompi_info_get (ompi_info_t *info, char *key, int valuelen,
char *value, int *flag);
/**
* Delete a (key,value) pair from "info"

Просмотреть файл

@ -24,7 +24,6 @@
#include <string.h>
#include "opal/util/show_help.h"
#include "orte/mca/ns/ns.h"
#include "ompi/class/ompi_bitmap.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/bml/base/base.h"
@ -34,6 +33,7 @@
#include "ompi/mca/bml/base/bml_base_btl.h"
#include "bml_r2.h"
#include "orte/class/orte_proc_table.h"
#include "orte/util/name_fns.h"
#include "ompi/proc/proc.h"
extern mca_bml_base_component_t mca_bml_r2_component;
@ -450,9 +450,9 @@ int mca_bml_r2_add_procs(
OMPI_ERR_UNREACH == ret) {
char *local, *remote;
orte_ns.get_proc_name_string(&local,
orte_util_convert_process_name_to_string(&local,
&(ompi_proc_local_proc->proc_name));
orte_ns.get_proc_name_string(&remote,
orte_util_convert_process_name_to_string(&remote,
&(unreach_proc->proc_name));
opal_show_help("help-mca-bml-r2",

Просмотреть файл

@ -24,7 +24,6 @@
#include <string.h>
#include "opal/util/show_help.h"
#include "orte/mca/ns/ns.h"
#include "ompi/runtime/ompi_cr.h"
#include "ompi/class/ompi_bitmap.h"
#include "ompi/mca/bml/bml.h"
@ -35,9 +34,7 @@
#include "ompi/mca/bml/base/bml_base_btl.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/class/orte_proc_table.h"
#include "ompi/proc/proc.h"
@ -117,9 +114,11 @@ int mca_bml_r2_ft_event(int state) {
if( NULL != mca_bml_r2.btl_modules) {
free( mca_bml_r2.btl_modules);
mca_bml_r2.btl_modules = NULL;
}
if( NULL != mca_bml_r2.btl_progress ) {
free( mca_bml_r2.btl_progress);
mca_bml_r2.btl_progress = NULL;
}
opal_output_verbose(10, ompi_cr_output,
@ -163,8 +162,10 @@ int mca_bml_r2_ft_event(int state) {
mca_bml_r2.btls_added = false;
for(p = 0; p < (int)num_procs; ++p) {
OBJ_RELEASE(procs[p]->proc_bml);
procs[p]->proc_bml = NULL;
if( NULL != procs[p]->proc_bml) {
OBJ_RELEASE(procs[p]->proc_bml);
procs[p]->proc_bml = NULL;
}
OBJ_RELEASE(procs[p]);
}

Просмотреть файл

@ -25,8 +25,11 @@
#include "base.h"
#include "btl_base_error.h"
#include "opal/util/show_help.h"
#include "orte/util/sys_info.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/types.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
int mca_btl_base_verbose;
@ -60,7 +63,7 @@ void mca_btl_base_error_no_nics(const char* transport,
char *procid;
if (mca_btl_base_warn_component_unused) {
/* print out no-nic warning if user told us to */
asprintf(&procid, "%s", ORTE_NAME_PRINT(orte_process_info.my_name));
asprintf(&procid, "%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
opal_show_help("help-mpi-btl-base.txt", "btl:no-nics",
true, procid, transport, orte_system_info.nodename,

Просмотреть файл

@ -28,7 +28,8 @@
#include "orte/util/proc_info.h"
#include "orte/util/sys_info.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
OMPI_DECLSPEC extern int mca_btl_base_verbose;
@ -39,7 +40,7 @@ extern int mca_btl_base_out(const char*, ...);
do { \
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
orte_system_info.nodename, \
ORTE_NAME_PRINT(orte_process_info.my_name), \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_out args; \
mca_btl_base_out("\n"); \
@ -50,7 +51,7 @@ do { \
do { \
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
orte_system_info.nodename, \
ORTE_NAME_PRINT(orte_process_info.my_name), \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
@ -59,7 +60,7 @@ do { \
#define BTL_PEER_ERROR(proc, args) \
do { \
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
ORTE_NAME_PRINT(orte_process_info.my_name), \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, __func__, \
orte_system_info.nodename); \
if(proc && proc->proc_hostname) { \
@ -76,7 +77,7 @@ do { \
if(mca_btl_base_verbose > 0) { \
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
orte_system_info.nodename, \
ORTE_NAME_PRINT(orte_process_info.my_name), \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
@ -89,13 +90,9 @@ do { \
#endif
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
OMPI_DECLSPEC extern void mca_btl_base_error_no_nics(const char* transport,
const char* nic_name);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
END_C_DECLS

Просмотреть файл

@ -154,7 +154,7 @@ int mca_btl_base_select(bool enable_progress_threads,
if (0 == opal_list_get_size(&mca_btl_base_modules_initialized)) {
opal_show_help("help-mca-base.txt", "find-available:none-found", true,
"btl");
orte_errmgr.error_detected(1, NULL);
orte_errmgr.abort(1, NULL);
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -23,6 +23,9 @@
#include "ompi/runtime/ompi_module_exchange.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/mca/mpool/base/base.h"
#include "btl_elan.h"
#include "btl_elan_frag.h"
#include "btl_elan_endpoint.h"
@ -225,6 +228,8 @@ mca_btl_elan_component_init( int *num_btl_modules,
mca_btl_elan_component.elan_free_list_inc,
NULL ); /* use default allocator */
vpid = ORTE_PROC_MY_NAME->vpid;
ompi_modex_send( &mca_btl_elan_component.super.btl_version, &vpid,
sizeof(vpid));

Просмотреть файл

@ -14,11 +14,10 @@
#include <sys/time.h>
#include <time.h>
#include "ompi/types.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss.h"
#include "opal/dss/dss.h"
#include "btl_elan.h"
#include "btl_elan_endpoint.h"
#include "btl_elan_proc.h"

Просмотреть файл

@ -12,7 +12,6 @@
#ifndef MCA_BTL_ELAN_PROC_H
#define MCA_BTL_ELAN_PROC_H
#include "orte/mca/ns/ns.h"
#include "opal/class/opal_object.h"
#include "ompi/proc/proc.h"
#include "btl_elan.h"

Просмотреть файл

@ -44,6 +44,8 @@
#include "ompi/datatype/convertor.h"
#include "btl_gm_endpoint.h"
#include "orte/util/proc_info.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "ompi/runtime/ompi_module_exchange.h"
@ -440,7 +442,7 @@ static int mca_btl_gm_discover( void )
"%s gm_port %08lX, "
"board %" PRIu32 ", global %" PRIu32 " "
"node %" PRIu32 "port %" PRIu32 "\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(unsigned long) port, board_no, global_id, node_id, port_no);
}

Просмотреть файл

@ -21,11 +21,6 @@
#include <sys/time.h>
#include <time.h>
#include "ompi/types.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss.h"
#include "btl_gm.h"
#include "btl_gm_endpoint.h"
#include "btl_gm_proc.h"

Просмотреть файл

@ -25,6 +25,9 @@
#include <inttypes.h>
#endif
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "btl_gm.h"
#include "btl_gm_proc.h"
@ -191,7 +194,7 @@ int mca_btl_gm_proc_insert(
if(mca_btl_gm_component.gm_debug > 0) {
opal_output(0, "%s mapped global id %" PRIu32
" to node id %" PRIu32 "\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
gm_endpoint->endpoint_addr.global_id,
gm_endpoint->endpoint_addr.node_id);
}

Просмотреть файл

@ -19,7 +19,6 @@
#ifndef MCA_BTL_GM_PROC_H
#define MCA_BTL_GM_PROC_H
#include "orte/mca/ns/ns.h"
#include "opal/class/opal_object.h"
#include "ompi/proc/proc.h"
#include "btl_gm.h"

Просмотреть файл

@ -21,10 +21,6 @@
#include <sys/time.h>
#include <time.h>
#include "ompi/types.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "btl_mx.h"
#include "btl_mx_endpoint.h"
#include "btl_mx_proc.h"

Просмотреть файл

@ -19,6 +19,7 @@
#include "ompi_config.h"
#include "opal/class/opal_hash_table.h"
#include "orte/util/name_fns.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "btl_mx.h"

Просмотреть файл

@ -19,7 +19,6 @@
#ifndef MCA_BTL_MX_PROC_H
#define MCA_BTL_MX_PROC_H
#include "orte/mca/ns/ns.h"
#include "opal/class/opal_object.h"
#include "ompi/proc/proc.h"
#include "btl_mx.h"

Просмотреть файл

@ -34,6 +34,8 @@
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "orte/runtime/orte_globals.h"
#include "btl_ofud.h"
#include "btl_ofud_frag.h"
#include "btl_ofud_endpoint.h"
@ -280,7 +282,7 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
*num_btl_modules = 0;
num_devs = 0;
seedv[0] = orte_process_info.my_name->vpid;
seedv[0] = ORTE_PROC_MY_NAME->vpid;
seedv[1] = opal_sys_timer_get_cycles();
seedv[2] = opal_sys_timer_get_cycles();
seed48(seedv);

Просмотреть файл

@ -22,7 +22,6 @@
#define MCA_BTL_UD_PROC_H
#include "opal/class/opal_object.h"
#include "orte/mca/ns/ns.h"
#include "ompi/proc/proc.h"
#include "btl_ofud.h"

Просмотреть файл

@ -57,6 +57,7 @@
#ifdef HAVE_SYS_RESOURCE_H
#include <sys/resource.h>
#endif
#include <unistd.h>
mca_btl_openib_module_t mca_btl_openib_module = {
{

Просмотреть файл

@ -46,6 +46,7 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/sys_info.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/pml/pml.h"
@ -1315,7 +1316,7 @@ btl_openib_component_init(int *num_btl_modules,
*num_btl_modules = 0;
num_devs = 0;
seedv[0] = orte_process_info.my_name->vpid;
seedv[0] = ORTE_PROC_MY_NAME->vpid;
seedv[1] = opal_sys_timer_get_cycles();
seedv[2] = opal_sys_timer_get_cycles();
seed48(seedv);

Просмотреть файл

@ -30,11 +30,9 @@
#include <errno.h>
#include <string.h>
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss.h"
#include "ompi/types.h"
#include "ompi/mca/pml/base/pml_base_sendreq.h"

Просмотреть файл

@ -20,7 +20,6 @@
#ifndef MCA_BTL_IB_PROC_H
#define MCA_BTL_IB_PROC_H
#include "orte/mca/ns/ns.h"
#include "opal/class/opal_object.h"
#include "ompi/proc/proc.h"
#include "btl_openib.h"

Просмотреть файл

@ -22,11 +22,14 @@
#include "ompi_config.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss.h"
#include "opal/dss/dss.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/mca/dpm/dpm.h"
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
@ -59,10 +62,10 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
uint8_t message_type);
static void rml_send_cb(int status, orte_process_name_t* endpoint,
orte_buffer_t* buffer, orte_rml_tag_t tag,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
static void rml_recv_cb(int status, orte_process_name_t* process_name,
orte_buffer_t* buffer, orte_rml_tag_t tag,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
/*
@ -107,7 +110,7 @@ static int oob_init(void)
int rc;
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_OPENIB,
OMPI_RML_TAG_OPENIB,
ORTE_RML_PERSISTENT,
rml_recv_cb,
NULL);
@ -158,7 +161,7 @@ static int oob_query(mca_btl_openib_hca_t *hca)
*/
static int oob_finalize(void)
{
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_OPENIB);
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
return OMPI_SUCCESS;
}
@ -416,7 +419,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
uint8_t message_type)
{
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t);
int rc;
if (NULL == buffer) {
@ -425,15 +428,15 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
}
/* pack the info in the send buffer */
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT8));
rc = orte_dss.pack(buffer, &message_type, 1, ORTE_UINT8);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8));
rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT64));
rc = orte_dss.pack(buffer, &endpoint->subnet_id, 1, ORTE_UINT64);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64));
rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -441,16 +444,16 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
if (message_type != ENDPOINT_CONNECT_REQUEST) {
/* send the QP connect request info we respond to */
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer,
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer,
&endpoint->rem_info.rem_qps[0].rem_qp_num, 1,
ORTE_UINT32);
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16));
rc = orte_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, ORTE_UINT16);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
rc = opal_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -461,37 +464,37 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
int qp;
/* stuff all the QP info into the buffer */
for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num,
1, ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num,
1, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1,
ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1,
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16));
rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, ORTE_UINT16);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1,
ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1,
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &endpoint->index, 1, ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -500,7 +503,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
/* send to remote endpoint */
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid,
buffer, ORTE_RML_TAG_OPENIB, 0,
buffer, OMPI_RML_TAG_OPENIB, 0,
rml_send_cb, NULL);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -519,7 +522,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
* remote peer
*/
static void rml_send_cb(int status, orte_process_name_t* endpoint,
orte_buffer_t* buffer, orte_rml_tag_t tag,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
OBJ_RELEASE(buffer);
@ -532,7 +535,7 @@ static void rml_send_cb(int status, orte_process_name_t* endpoint,
* otherwise try to modify QP's and establish reliable connection
*/
static void rml_recv_cb(int status, orte_process_name_t* process_name,
orte_buffer_t* buffer, orte_rml_tag_t tag,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
mca_btl_openib_proc_t *ib_proc;
@ -548,29 +551,29 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
/* start by unpacking data first so we know who is knocking at
our door */
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT8));
rc = orte_dss.unpack(buffer, &message_type, &cnt, ORTE_UINT8);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8));
rc = opal_dss.unpack(buffer, &message_type, &cnt, OPAL_UINT8);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
}
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT64));
rc = orte_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, ORTE_UINT64);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64));
rc = opal_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, OPAL_UINT64);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
}
if (ENDPOINT_CONNECT_REQUEST != message_type) {
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &lcl_qp, &cnt, ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &lcl_qp, &cnt, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
}
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16));
rc = orte_dss.unpack(buffer, &lcl_lid, &cnt, ORTE_UINT16);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
rc = opal_dss.unpack(buffer, &lcl_lid, &cnt, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
@ -585,36 +588,36 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
/* unpack all the qp info */
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt,
ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt,
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
}
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_psn, &cnt,
ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_psn, &cnt,
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
}
}
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16));
rc = orte_dss.unpack(buffer, &rem_info.rem_lid, &cnt, ORTE_UINT16);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
rc = opal_dss.unpack(buffer, &rem_info.rem_lid, &cnt, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
}
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
}
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &rem_info.rem_index, &cnt, ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &rem_info.rem_index, &cnt, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
@ -625,7 +628,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
rem_info.rem_lid,
rem_info.rem_subnet_id));
master = orte_ns.compare_fields(ORTE_NS_CMP_ALL, orte_process_info.my_name,
master = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME,
process_name) > 0 ? true : false;
for (ib_proc = (mca_btl_openib_proc_t*)
@ -635,8 +638,8 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
bool found = false;
if (orte_ns.compare_fields(ORTE_NS_CMP_ALL,
&ib_proc->proc_guid, process_name) != ORTE_EQUAL) {
if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&ib_proc->proc_guid, process_name) != OPAL_EQUAL) {
continue;
}

Просмотреть файл

@ -10,11 +10,11 @@
#include "ompi_config.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/util/name_fns.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss.h"
#include "opal/dss/dss.h"
#include "ompi/mca/dpm/dpm.h"
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
@ -55,8 +55,6 @@ typedef enum {
ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE /* The xrc recv qp already was destroyed */
} connect_message_type_t;
#define XOOB_TAG (ORTE_RML_TAG_DYNAMIC - 1)
#define XOOB_SET_REMOTE_INFO(EP, INFO) \
do { \
/* copy the rem_info stuff */ \
@ -79,7 +77,7 @@ static int xoob_priority = 60;
* remote peer
*/
static void xoob_rml_send_cb(int status, orte_process_name_t* endpoint,
orte_buffer_t* buffer, orte_rml_tag_t tag,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
OBJ_RELEASE(buffer);
@ -87,29 +85,29 @@ static void xoob_rml_send_cb(int status, orte_process_name_t* endpoint,
/* Receive connect information to remote endpoint */
static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *lid,
uint8_t *message_type, orte_buffer_t* buffer)
uint8_t *message_type, opal_buffer_t* buffer)
{
int cnt = 1, rc, srq;
/* Recv standart header */
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT8));
rc = orte_dss.unpack(buffer, message_type, &cnt, ORTE_UINT8);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8));
rc = opal_dss.unpack(buffer, message_type, &cnt, OPAL_UINT8);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
}
BTL_VERBOSE(("Recv unpack Message type = %d", *message_type));
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT64));
rc = orte_dss.unpack(buffer, &info->rem_subnet_id, &cnt, ORTE_UINT64);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64));
rc = opal_dss.unpack(buffer, &info->rem_subnet_id, &cnt, OPAL_UINT64);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
}
BTL_VERBOSE(("Recv unpack sid = %d", info->rem_subnet_id));
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16));
rc = orte_dss.unpack(buffer, &info->rem_lid, &cnt, ORTE_UINT16);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
rc = opal_dss.unpack(buffer, &info->rem_lid, &cnt, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
@ -121,26 +119,26 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
*/
if (ENDPOINT_XOOB_CONNECT_REQUEST == *message_type ||
ENDPOINT_XOOB_CONNECT_RESPONSE == *message_type) {
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt,
ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt,
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
}
BTL_VERBOSE(("Recv unpack remote qp = %x", info->rem_qps->rem_qp_num));
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &info->rem_qps->rem_psn, &cnt,
ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &info->rem_qps->rem_psn, &cnt,
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
}
BTL_VERBOSE(("Recv unpack remote psn = %d", info->rem_qps->rem_psn));
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &info->rem_mtu, &cnt, ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &info->rem_mtu, &cnt, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
@ -151,8 +149,8 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
if (ENDPOINT_XOOB_CONNECT_REQUEST == *message_type ||
ENDPOINT_XOOB_CONNECT_XRC_REQUEST == *message_type) {
/* unpack requested lid info */
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT16));
rc = orte_dss.unpack(buffer, lid, &cnt, ORTE_UINT16);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
rc = opal_dss.unpack(buffer, lid, &cnt, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
@ -162,10 +160,10 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
/* Unpack requested recv qp number */
if (ENDPOINT_XOOB_CONNECT_XRC_REQUEST == *message_type) {
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
/* In XRC request case we will use rem_qp_num as container for requested qp number */
rc = orte_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt,
ORTE_UINT32);
rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt,
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -175,8 +173,8 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
if (ENDPOINT_XOOB_CONNECT_RESPONSE == *message_type ||
ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == *message_type) {
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &info->rem_index, &cnt, ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &info->rem_index, &cnt, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
@ -184,8 +182,8 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
BTL_VERBOSE(("Recv unpack remote index = %d", info->rem_index));
for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) {
BTL_VERBOSE(("unpacking %d of %d\n", cnt, ORTE_UINT32));
rc = orte_dss.unpack(buffer, &info->rem_srqs[srq].rem_srq_num, &cnt, ORTE_UINT32);
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &info->rem_srqs[srq].rem_srq_num, &cnt, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
@ -202,7 +200,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
uint8_t message_type)
{
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t);
int rc, srq;
if (NULL == buffer) {
@ -217,24 +215,24 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
*/
/* pack the info in the send buffer */
BTL_VERBOSE(("Send pack Message type = %d", message_type));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT8));
rc = orte_dss.pack(buffer, &message_type, 1, ORTE_UINT8);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8));
rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
BTL_VERBOSE(("Send pack sid = %d", endpoint->subnet_id));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT64));
rc = orte_dss.pack(buffer, &endpoint->subnet_id, 1, ORTE_UINT64);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64));
rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
BTL_VERBOSE(("Send pack lid = %d", endpoint->endpoint_btl->lid));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16));
rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, ORTE_UINT16);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -260,24 +258,24 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
/* stuff all the QP info into the buffer */
/* we need to send only one QP */
BTL_VERBOSE(("Send pack qp num = %x", qp_num));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &qp_num, 1, ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &qp_num, 1, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
BTL_VERBOSE(("Send pack lpsn = %d", psn));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &psn, 1, ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &psn, 1, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
BTL_VERBOSE(("Send pack mtu = %d", endpoint->endpoint_btl->hca->mtu));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1,
ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1,
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -293,8 +291,8 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
/* when we are sending request we add remote lid that we want to connect */
BTL_VERBOSE(("Send pack remote lid = %d", endpoint->ib_addr->lid));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT16));
rc = orte_dss.pack(buffer, &endpoint->ib_addr->lid, 1, ORTE_UINT16);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
rc = opal_dss.pack(buffer, &endpoint->ib_addr->lid, 1, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -305,9 +303,9 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
* recv qp number that we want to connect. */
if (ENDPOINT_XOOB_CONNECT_XRC_REQUEST == message_type) {
BTL_VERBOSE(("Send pack remote qp = %x", endpoint->ib_addr->remote_xrc_rcv_qp_num));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &endpoint->ib_addr->remote_xrc_rcv_qp_num,
1, ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->ib_addr->remote_xrc_rcv_qp_num,
1, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -322,8 +320,8 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == message_type) {
/* we need to send the endpoint index for immidate send */
BTL_VERBOSE(("Send pack index = %d", endpoint->index));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &endpoint->index, 1, ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -331,9 +329,9 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
/* on response we add all SRQ numbers */
for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) {
BTL_VERBOSE(("Send pack srq[%d] num = %d", srq, endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num));
BTL_VERBOSE(("packing %d of %d\n", 1, ORTE_UINT32));
rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num,
1, ORTE_UINT32);
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num,
1, OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -343,7 +341,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
/* send to remote endpoint */
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid,
buffer, XOOB_TAG, 0,
buffer, OMPI_RML_TAG_XOPENIB, 0,
xoob_rml_send_cb, NULL);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -642,8 +640,8 @@ static mca_btl_openib_endpoint_t* xoob_find_endpoint(orte_process_name_t* proces
ib_proc != (mca_btl_openib_proc_t*)
opal_list_get_end(&mca_btl_openib_component.ib_procs);
ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
if (orte_ns.compare_fields(ORTE_NS_CMP_ALL,
&ib_proc->proc_guid, process_name) == ORTE_EQUAL) {
if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&ib_proc->proc_guid, process_name) == OPAL_EQUAL) {
found = true;
break;
}
@ -753,7 +751,7 @@ static void free_rem_info(mca_btl_openib_rem_info_t *rem_info)
* otherwise try to modify QP's and establish reliable connection
*/
static void xoob_rml_recv_cb(int status, orte_process_name_t* process_name,
orte_buffer_t* buffer, orte_rml_tag_t tag,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int rc;
@ -947,7 +945,7 @@ static int xoob_init(void)
int rc;
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
XOOB_TAG,
OMPI_RML_TAG_XOPENIB,
ORTE_RML_PERSISTENT,
xoob_rml_recv_cb,
NULL);
@ -1018,6 +1016,6 @@ static int xoob_start_connect(mca_btl_base_endpoint_t *endpoint)
*/
static int xoob_finalize(void)
{
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, XOOB_TAG);
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_XOPENIB);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -48,7 +48,6 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/ns/ns_types.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"

Просмотреть файл

@ -55,6 +55,7 @@
#include "ompi/types.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "orte/util/name_fns.h"
#include "btl_sctp.h"
#include "btl_sctp_endpoint.h"
#include "btl_sctp_proc.h"
@ -567,7 +568,6 @@ bool mca_btl_sctp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct
/* 1 to 1 */
mca_btl_sctp_addr_t* btl_addr;
mca_btl_sctp_proc_t* this_proc = mca_btl_sctp_proc_local();
orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL;
int cmpval;
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock);
@ -576,7 +576,7 @@ bool mca_btl_sctp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct
btl_addr->addr_inet.s_addr == addr->sin_addr.s_addr)
{
mca_btl_sctp_proc_t *endpoint_proc = btl_endpoint->endpoint_proc;
cmpval = orte_ns.compare_fields(mask,
cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&endpoint_proc->proc_ompi->proc_name,
&this_proc->proc_ompi->proc_name);
if((btl_endpoint->endpoint_sd < 0) ||

Просмотреть файл

@ -20,7 +20,6 @@
#define MCA_BTL_SCTP_PROC_H
#include "opal/class/opal_object.h"
#include "orte/mca/ns/ns.h"
#include "ompi/proc/proc.h"
#include "btl_sctp.h"
#include "btl_sctp_addr.h"

Просмотреть файл

@ -43,7 +43,6 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/ns/ns_types.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"

Просмотреть файл

@ -45,6 +45,8 @@
#include "opal/util/output.h"
#include "orte/util/sys_info.h"
#include "orte/util/proc_info.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/mca/pml/pml.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/runtime/ompi_module_exchange.h"
@ -263,7 +265,7 @@ mca_btl_base_module_t** mca_btl_sm_component_init(
/* create a named pipe to receive events */
sprintf( mca_btl_sm_component.sm_fifo_path,
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir,
(unsigned long)orte_process_info.my_name->vpid );
(unsigned long)ORTE_PROC_MY_NAME->vpid );
if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) {
opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno);
return NULL;

Просмотреть файл

@ -50,7 +50,7 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/types.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"

Просмотреть файл

@ -344,7 +344,7 @@ bool mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint,
return false;
}
cmpval = orte_ns.compare_fields(ORTE_NS_CMP_ALL,
cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&endpoint_proc->proc_ompi->proc_name,
&this_proc->proc_ompi->proc_name);
if((btl_endpoint->endpoint_sd < 0) ||
@ -475,7 +475,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
}
ORTE_PROCESS_NAME_NTOH(guid);
/* compare this to the expected values */
if (0 != orte_ns.compare_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) {
if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) {
BTL_ERROR(("received unexpected process identifier %s",
ORTE_NAME_PRINT(&guid)));
mca_btl_tcp_endpoint_close(btl_endpoint);

Просмотреть файл

@ -20,8 +20,8 @@
#define MCA_BTL_TCP_PROC_H
#include "opal/class/opal_object.h"
#include "orte/mca/ns/ns.h"
#include "ompi/proc/proc.h"
#include "orte/types.h"
#include "btl_tcp.h"
#include "btl_tcp_addr.h"
#include "btl_tcp_endpoint.h"

Просмотреть файл

@ -21,11 +21,10 @@
#include <sys/time.h>
#include <time.h>
#include "ompi/types.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss.h"
#include "opal/dss/dss.h"
#include "btl_template.h"
#include "btl_template_endpoint.h"
#include "btl_template_proc.h"

Просмотреть файл

@ -19,7 +19,6 @@
#ifndef MCA_BTL_TEMPLATE_PROC_H
#define MCA_BTL_TEMPLATE_PROC_H
#include "orte/mca/ns/ns.h"
#include "opal/class/opal_object.h"
#include "ompi/proc/proc.h"
#include "btl_template.h"

Просмотреть файл

@ -226,7 +226,7 @@ do { \
if (verbose_level <= mca_btl_udapl_component.udapl_verbosity) { \
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
orte_system_info.nodename, \
ORTE_NAME_PRINT(orte_process_info.my_name), \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_out args; \
mca_btl_base_out("\n"); \

Просмотреть файл

@ -28,14 +28,17 @@
#include "ompi/types.h"
#include "opal/include/opal/align.h"
#include "opal/util/show_help.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss.h"
#include "opal/dss/dss.h"
#include "opal/class/opal_pointer_array.h"
#include "ompi/class/ompi_free_list.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "btl_udapl.h"
#include "btl_udapl_endpoint.h"
@ -44,14 +47,14 @@
#include "btl_udapl_proc.h"
static void mca_btl_udapl_endpoint_send_cb(int status, orte_process_name_t* endpoint,
orte_buffer_t* buffer, orte_rml_tag_t tag,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint);
static int mca_btl_udapl_endpoint_post_recv(mca_btl_udapl_endpoint_t* endpoint,
size_t size);
void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint);
void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
orte_buffer_t* buffer, orte_rml_tag_t tag,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
static int mca_btl_udapl_endpoint_finish_eager(mca_btl_udapl_endpoint_t*);
static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t*);
@ -272,7 +275,7 @@ int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint,
static void mca_btl_udapl_endpoint_send_cb(int status, orte_process_name_t* endpoint,
orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata)
opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata)
{
OBJ_RELEASE(buffer);
}
@ -467,7 +470,7 @@ int mca_btl_udapl_endpoint_create(mca_btl_udapl_module_t* btl,
static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
{
mca_btl_udapl_addr_t* addr = &endpoint->endpoint_btl->udapl_addr;
orte_buffer_t* buf = OBJ_NEW(orte_buffer_t);
opal_buffer_t* buf = OBJ_NEW(opal_buffer_t);
int rc;
if(NULL == buf) {
@ -478,13 +481,13 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), 1);
/* Pack our address information */
rc = orte_dss.pack(buf, &addr->port, 1, ORTE_UINT64);
rc = opal_dss.pack(buf, &addr->port, 1, OPAL_UINT64);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dss.pack(buf, &addr->addr, sizeof(DAT_SOCK_ADDR), ORTE_UINT8);
rc = opal_dss.pack(buf, &addr->addr, sizeof(DAT_SOCK_ADDR), OPAL_UINT8);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -492,7 +495,7 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
/* Send the buffer */
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, buf,
ORTE_RML_TAG_UDAPL, 0, mca_btl_udapl_endpoint_send_cb, NULL);
OMPI_RML_TAG_UDAPL, 0, mca_btl_udapl_endpoint_send_cb, NULL);
if(0 > rc) {
ORTE_ERROR_LOG(rc);
return rc;
@ -504,7 +507,7 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata)
opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata)
{
mca_btl_udapl_addr_t addr;
mca_btl_udapl_proc_t* proc;
@ -514,14 +517,14 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
int rc;
/* Unpack data */
rc = orte_dss.unpack(buffer, &addr.port, &cnt, ORTE_UINT64);
rc = opal_dss.unpack(buffer, &addr.port, &cnt, OPAL_UINT64);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
}
cnt = sizeof(mca_btl_udapl_addr_t);
rc = orte_dss.unpack(buffer, &addr.addr, &cnt, ORTE_UINT8);
rc = opal_dss.unpack(buffer, &addr.addr, &cnt, OPAL_UINT8);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
@ -535,7 +538,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
opal_list_get_end(&mca_btl_udapl_component.udapl_procs);
proc = (mca_btl_udapl_proc_t*)opal_list_get_next(proc)) {
if(ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &proc->proc_guid, endpoint)) {
if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &proc->proc_guid, endpoint)) {
for(i = 0; i < proc->proc_endpoint_count; i++) {
ep = proc->proc_endpoints[i];
@ -561,7 +564,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
void mca_btl_udapl_endpoint_post_oob_recv(void)
{
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UDAPL,
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, OMPI_RML_TAG_UDAPL,
ORTE_RML_PERSISTENT, mca_btl_udapl_endpoint_recv, NULL);
}
@ -577,7 +580,7 @@ void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint)
/* Nasty test to prevent deadlock and unwanted connection attempts */
/* This right here is the whole point of using the ORTE/RML handshake */
if((MCA_BTL_UDAPL_CONN_EAGER == endpoint->endpoint_state &&
0 > orte_ns.compare_fields(ORTE_NS_CMP_ALL,
0 > orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&endpoint->endpoint_proc->proc_guid,
&ompi_proc_local()->proc_name)) ||
(MCA_BTL_UDAPL_CLOSED != endpoint->endpoint_state &&
@ -715,7 +718,7 @@ static int mca_btl_udapl_endpoint_finish_eager(
}
/* Only one side does dat_ep_connect() */
if(0 < orte_ns.compare_fields(ORTE_NS_CMP_ALL,
if(0 < orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&endpoint->endpoint_proc->proc_guid,
&ompi_proc_local()->proc_name)) {

Просмотреть файл

@ -20,7 +20,6 @@
#ifndef MCA_BTL_UDAPL_PROC_H
#define MCA_BTL_UDAPL_PROC_H
#include "orte/mca/ns/ns.h"
#include "opal/class/opal_object.h"
#include "ompi/proc/proc.h"
#include "btl_udapl.h"

Просмотреть файл

@ -24,7 +24,7 @@
#include "mpi.h"
#include "opal/mca/mca.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/types.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/common/sm/common_sm_mmap.h"

Просмотреть файл

@ -43,7 +43,10 @@
#include "opal/mca/maffinity/maffinity.h"
#include "opal/mca/maffinity/base/base.h"
#include "opal/util/os_path.h"
#include "orte/mca/ns/ns.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/base.h"
@ -598,7 +601,7 @@ static int bootstrap_comm(ompi_communicator_t *comm,
empty_index = -1;
for (i = 0; i < mca_coll_sm_component.sm_bootstrap_num_segments; ++i) {
if (comm->c_contextid == bshe->smbhe_keys[i].mcsbck_cid &&
ORTE_EQUAL == orte_ns.compare_fields(ORTE_NS_CMP_ALL,
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
rank0,
&bshe->smbhe_keys[i].mcsbck_rank0_name)) {
found = true;

Просмотреть файл

@ -17,7 +17,7 @@
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include "common_portals.h"

Просмотреть файл

@ -51,6 +51,8 @@
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rml/base/base.h"
#include "ompi/mca/dpm/dpm.h"
OBJ_CLASS_INSTANCE(
mca_common_sm_mmap_t,
opal_object_t,
@ -179,13 +181,13 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name,
/* signal the rest of the local procs that the backing file
has been created */
for(p=1 ; p < n_local_procs ; p++ ) {
sm_file_created=ORTE_RML_TAG_SM_BACK_FILE_CREATED;
sm_file_created=OMPI_RML_TAG_SM_BACK_FILE_CREATED;
iov[0].iov_base=&sm_file_created;
iov[0].iov_len=sizeof(sm_file_created);
iov[1].iov_base=&sm_file_inited;
iov[1].iov_len=sizeof(sm_file_inited);
rc=orte_rml.send(&(procs[p]->proc_name),iov,2,
ORTE_RML_TAG_SM_BACK_FILE_CREATED,0);
OMPI_RML_TAG_SM_BACK_FILE_CREATED,0);
if( rc < 0 ) {
opal_output(0,
"mca_common_sm_mmap_init: orte_rml.send failed to %lu with errno=%d\n",
@ -205,7 +207,7 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name,
iov[1].iov_base=&sm_file_inited;
iov[1].iov_len=sizeof(sm_file_inited);
rc=orte_rml.recv(&(procs[0]->proc_name),iov,2,
ORTE_RML_TAG_SM_BACK_FILE_CREATED,0);
OMPI_RML_TAG_SM_BACK_FILE_CREATED,0);
if( rc < 0 ) {
opal_output(0, "mca_common_sm_mmap_init: orte_rml.recv failed from %ld with errno=%d\n",
0L, errno);

Просмотреть файл

@ -22,7 +22,7 @@
#include "ompi/constants.h"
#include "orte/mca/rml/rml.h"
#include "orte/dss/dss.h"
#include "opal/dss/dss.h"
#include "ompi/mca/crcp/crcp.h"

Просмотреть файл

@ -31,9 +31,6 @@
#include "opal/util/output.h"
#include "opal/util/os_dirpath.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/gpr/gpr.h"
#include "ompi/communicator/communicator.h"
#include "ompi/proc/proc.h"
#include "opal/mca/base/mca_base_param.h"

Просмотреть файл

@ -29,7 +29,6 @@
#include "opal/mca/mca.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/communicator/communicator.h"
#include "orte/mca/ns/ns.h"
#include "opal/runtime/opal_cr.h"
#include "opal/threads/mutex.h"
#include "opal/threads/condition.h"

Просмотреть файл

@ -29,7 +29,6 @@
#include "opal/mca/mca.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/communicator/communicator.h"
#include "orte/mca/ns/ns.h"
#include "opal/runtime/opal_cr.h"
#include "opal/threads/mutex.h"
#include "opal/threads/condition.h"

Просмотреть файл

@ -171,8 +171,11 @@
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "ompi/request/request.h"
#include "ompi/datatype/dt_arch.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/pml/base/pml_base_request.h"
@ -279,7 +282,7 @@ static int recv_bookmarks(int peer_idx);
*/
static void recv_bookmarks_cbfunc(int status,
orte_process_name_t* sender,
orte_buffer_t *buffer,
opal_buffer_t *buffer,
orte_rml_tag_t tag,
void* cbdata);
static int total_recv_bookmarks = 0;
@ -367,7 +370,7 @@ static int ft_event_post_drain_acks(void);
*/
static void drain_message_ack_cbfunc(int status,
orte_process_name_t* sender,
orte_buffer_t *buffer,
opal_buffer_t *buffer,
orte_rml_tag_t tag,
void* cbdata);
@ -766,7 +769,7 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_state_t,
#define PACK_BUFFER(buffer, var, count, type, error_msg) \
{ \
if (OMPI_SUCCESS != (ret = orte_dss.pack(buffer, &(var), count, type)) ) { \
if (OMPI_SUCCESS != (ret = opal_dss.pack(buffer, &(var), count, type)) ) { \
opal_output(mca_crcp_coord_component.super.output_handle, \
"%s (Return %d)", error_msg, ret); \
exit_status = ret; \
@ -777,7 +780,7 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_state_t,
#define UNPACK_BUFFER(buffer, var, count, type, error_msg) \
{ \
orte_std_cntr_t n = count; \
if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &(var), &n, type)) ) { \
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(var), &n, type)) ) { \
opal_output(mca_crcp_coord_component.super.output_handle, \
"%s (Return %d)", error_msg, ret); \
exit_status = ret; \
@ -2703,6 +2706,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_ft_event(
ompi_crcp_base_pml_state_t* pml_state)
{
static int step_to_return_to = 0;
opal_list_item_t* item = NULL;
int exit_status = OMPI_SUCCESS;
int ret;
@ -2782,6 +2786,19 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_ft_event(
goto DONE;
}
/*
* Refresh the jobids
*/
for(item = opal_list_get_first(&ompi_crcp_coord_pml_peer_refs);
item != opal_list_get_end(&ompi_crcp_coord_pml_peer_refs);
item = opal_list_get_next(item) ) {
ompi_crcp_coord_pml_peer_ref_t *cur_peer_ref;
cur_peer_ref = (ompi_crcp_coord_pml_peer_ref_t*)item;
/* JJH - Assuming only one global jobid at the moment */
cur_peer_ref->proc_name.jobid = ORTE_PROC_MY_NAME->jobid;
}
/*
* Finish the coord protocol
*/
@ -2832,9 +2849,9 @@ static ompi_crcp_coord_pml_peer_ref_t * find_peer(orte_process_name_t proc)
ompi_crcp_coord_pml_peer_ref_t *cur_peer_ref;
cur_peer_ref = (ompi_crcp_coord_pml_peer_ref_t*)item;
if( 0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL,
&(cur_peer_ref->proc_name),
&proc) ) {
if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&(cur_peer_ref->proc_name),
&proc) ) {
return cur_peer_ref;
}
}
@ -2993,7 +3010,7 @@ static int ft_event_coordinate_peers(void)
if( stall_for_completion ) {
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: ft_event_coordinate_peers: %s **** STALLING ***",
ORTE_NAME_PRINT(orte_process_info.my_name)));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
step_to_return_to = 1;
exit_status = OMPI_SUCCESS;
goto DONE;
@ -3019,7 +3036,7 @@ static int ft_event_coordinate_peers(void)
OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle,
"crcp:coord: ft_event_coordinate_peers: %s Coordination Finished...\n",
ORTE_NAME_PRINT(orte_process_info.my_name) ));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/*
* Now that all our peer channels are marked as drained
@ -3099,7 +3116,7 @@ static int ft_event_finalize_exchange(void)
static int ft_event_exchange_bookmarks(void)
{
int peer_idx = 0;
int my_idx = orte_process_info.my_name->vpid;
int my_idx = ORTE_PROC_MY_NAME->vpid;
int iter = 0;
int num_peers = 0;
@ -3144,13 +3161,13 @@ static int ft_event_check_bookmarks(void)
int p_n_from_p_m = 0;
if( 10 <= mca_crcp_coord_component.super.verbose ) {
sleep(orte_process_info.my_name->vpid);
sleep(ORTE_PROC_MY_NAME->vpid);
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"Process %s Match Table",
ORTE_NAME_PRINT(orte_process_info.my_name)));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"%s %5s | %7s | %7s | %7s | %7s |",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
"Vpid", "T_Send", "M_Recv", "M_Send", "T_Recv"));
for(item = opal_list_get_first(&ompi_crcp_coord_pml_peer_refs);
@ -3176,7 +3193,7 @@ static int ft_event_check_bookmarks(void)
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"%s %5d | %7d | %7d | %7d | %7d |",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
peer_ref->proc_name.vpid,
t_send, m_recv, m_send, t_recv));
}
@ -3193,14 +3210,14 @@ static int ft_event_check_bookmarks(void)
ompi_crcp_coord_pml_peer_ref_t *peer_ref;
peer_ref = (ompi_crcp_coord_pml_peer_ref_t*)item;
if( 0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL,
(orte_process_info.my_name),
&(peer_ref->proc_name)) ) {
if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
(ORTE_PROC_MY_NAME),
&(peer_ref->proc_name)) ) {
continue;
}
/* Lowest Rank sends first */
if( orte_process_info.my_name->vpid < peer_ref->proc_name.vpid ) {
if( ORTE_PROC_MY_NAME->vpid < peer_ref->proc_name.vpid ) {
/********************
* Check P_n --> P_m
* Has the peer received all the messages that I have put on the wire?
@ -3218,7 +3235,7 @@ static int ft_event_check_bookmarks(void)
"crcp:coord: check_bookmarks: %s --> %s "
"Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). "
" WARNING: Peer received more than was sent. :(\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
p_n_to_p_m,
p_n_from_p_m,
@ -3232,7 +3249,7 @@ static int ft_event_check_bookmarks(void)
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: check_bookmarks: %s --> %s "
"Sent Msgs (%4d) = Received Msgs (%4d). Peer needs %4d.\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
p_n_to_p_m,
p_n_from_p_m,
@ -3269,7 +3286,7 @@ static int ft_event_check_bookmarks(void)
"crcp:coord: check_bookmarks: %s --> %s "
"Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). "
" WARNING: I received more than the peer sent. :(\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
p_n_to_p_m,
p_n_from_p_m,
@ -3283,7 +3300,7 @@ static int ft_event_check_bookmarks(void)
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: check_bookmarks: %s <-- %s "
"Received Msgs (%4d) = Sent Msgs (%4d). I need %4d.\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
p_n_to_p_m,
p_n_from_p_m,
@ -3321,7 +3338,7 @@ static int ft_event_check_bookmarks(void)
"crcp:coord: check_bookmarks: %s --> %s "
"Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). "
" WARNING: I received more than the peer sent. :(\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
p_n_to_p_m,
p_n_from_p_m,
@ -3335,7 +3352,7 @@ static int ft_event_check_bookmarks(void)
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: check_bookmarks: %s <-- %s "
"Received Msgs (%4d) = Sent Msgs (%4d). I need %4d.\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
p_n_to_p_m,
p_n_from_p_m,
@ -3371,7 +3388,7 @@ static int ft_event_check_bookmarks(void)
"crcp:coord: check_bookmarks: %s --> %s "
"Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). "
" WARNING: Peer received more than was sent. :(\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
p_n_to_p_m,
p_n_from_p_m,
@ -3385,7 +3402,7 @@ static int ft_event_check_bookmarks(void)
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: check_bookmarks: %s --> %s "
"Sent Msgs (%4d) = Received Msgs (%4d). Peer needs %4d.\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
p_n_to_p_m,
p_n_from_p_m,
@ -3433,7 +3450,7 @@ static int ft_event_post_drain_acks(void)
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: post_drain_ack: %s Wait on %d Drain ACK Messages.\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)req_size));
/*
@ -3453,7 +3470,7 @@ static int ft_event_post_drain_acks(void)
NULL) ) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: post_drain_acks: %s Failed to post a RML receive to the peer\n",
ORTE_NAME_PRINT(orte_process_info.my_name));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
return ret;
}
}
@ -3463,7 +3480,7 @@ static int ft_event_post_drain_acks(void)
static void drain_message_ack_cbfunc(int status,
orte_process_name_t* sender,
orte_buffer_t *buffer,
opal_buffer_t *buffer,
orte_rml_tag_t tag,
void* cbdata)
{
@ -3474,7 +3491,7 @@ static void drain_message_ack_cbfunc(int status,
/*
* Unpack the buffer
*/
UNPACK_BUFFER(buffer, ckpt_status, 1, ORTE_SIZE, "");
UNPACK_BUFFER(buffer, ckpt_status, 1, OPAL_SIZE, "");
/*
* Update the outstanding message queue
@ -3488,13 +3505,14 @@ static void drain_message_ack_cbfunc(int status,
/* If this ACK has not completed yet */
if(!drain_msg_ack->complete) {
/* If it is the correct peer */
if(drain_msg_ack->peer.jobid == sender->jobid &&
drain_msg_ack->peer.vpid == sender->vpid ) {
if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&(drain_msg_ack->peer),
sender) ) {
/* We found it! */
drain_msg_ack->complete = true;
OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle,
"crcp:coord: drain_message_ack_cbfunc: %s --> %s Received ACK of FLUSH from peer\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender) ));
return;
}
@ -3503,7 +3521,7 @@ static void drain_message_ack_cbfunc(int status,
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: drain_message_ack_cbfunc: %s --> %s ERROR: Uable to match ACK to peer\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender) );
cleanup:
@ -3523,7 +3541,7 @@ static int ft_event_post_drained(void)
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: post_drained: %s Draining %d Messages.\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)req_size));
/*
@ -3544,7 +3562,7 @@ static int ft_event_post_drained(void)
if( drain_msg->already_posted ) {
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: post_drained: %s Found a message that we don't need to post.\n",
ORTE_NAME_PRINT(orte_process_info.my_name)));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
continue;
}
/*
@ -3553,7 +3571,7 @@ static int ft_event_post_drained(void)
else {
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: post_drained: %s Posting a message to be drained from %d.\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
drain_msg->rank));
if( OMPI_SUCCESS != (ret = wrapped_pml_module->pml_irecv(drain_msg->buffer,
(drain_msg->count * drain_msg->ddt_size),
@ -3564,7 +3582,7 @@ static int ft_event_post_drained(void)
&(drain_msg->request) ) ) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: post_drained: %s Failed to post the Draining PML iRecv\n",
ORTE_NAME_PRINT(orte_process_info.my_name) );
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
return ret;
}
}
@ -3584,7 +3602,7 @@ static int ft_event_wait_quiesce(void)
if( OMPI_SUCCESS != (ret = wait_quiesce_drained() ) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: wait_quiesce: %s Failed to quiesce drained messages\n",
ORTE_NAME_PRINT(orte_process_info.my_name) );
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
exit_status = ret;
goto cleanup;
}
@ -3595,7 +3613,7 @@ static int ft_event_wait_quiesce(void)
if( OMPI_SUCCESS != (ret = wait_quiesce_drain_ack() ) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: wait_quiesce: %s Failed to recv all drain ACKs\n",
ORTE_NAME_PRINT(orte_process_info.my_name) );
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
exit_status = ret;
goto cleanup;
}
@ -3628,7 +3646,7 @@ static int wait_quiesce_drained(void)
OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle,
"crcp:coord: wait_quiesce_drained: %s Waiting on %d messages to drain\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)req_size));
/*
@ -3683,13 +3701,13 @@ static int wait_quiesce_drained(void)
if( drain_msg->already_posted && NULL == drain_msg->request) {
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: wait_quiesce_drained: %s - %s Already posted this msg.\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(drain_msg->proc_name)) ));
}
else {
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: wait_quiesce_drained: %s - %s Waiting on message. (index = %d)\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(drain_msg->proc_name)),
(int)wait_any_count));
@ -3704,8 +3722,9 @@ static int wait_quiesce_drained(void)
/* Add proc to response queue if it is not already there */
found = false;
for(i = 0; i < last_proc_idx; ++i) {
if(proc_names[i].jobid == drain_msg->proc_name.jobid &&
proc_names[i].vpid == drain_msg->proc_name.vpid ) {
if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&(proc_names[i]),
&(drain_msg->proc_name) ) ) {
found = true;
break;
}
@ -3713,7 +3732,7 @@ static int wait_quiesce_drained(void)
if( !found ) {
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: wait_quiesce: %s - %s Add process to response list [idx %d]\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(drain_msg->proc_name)),
(int)last_proc_idx));
@ -3741,19 +3760,19 @@ static int wait_quiesce_drained(void)
*/
OPAL_OUTPUT_VERBOSE((5, mca_crcp_coord_component.super.output_handle,
"crcp:coord: wait_quiesce: %s Send ACKs to all Peers\n",
ORTE_NAME_PRINT(orte_process_info.my_name)));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
for(i = 0; i < last_proc_idx; ++i) {
orte_buffer_t *buffer = NULL;
opal_buffer_t *buffer = NULL;
size_t response = 1;
/* Send All Clear to Peer */
if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) {
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
exit_status = OMPI_ERROR;
goto cleanup;
}
PACK_BUFFER(buffer, response, 1, ORTE_SIZE, "");
PACK_BUFFER(buffer, response, 1, OPAL_SIZE, "");
if ( 0 > ( ret = orte_rml.send_buffer(&(proc_names[i]), buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
exit_status = ret;
@ -3821,7 +3840,7 @@ static int coord_request_wait_all( size_t count,
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: request_wait_all: %s Done with idx %d of %d\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)i, (int)count));
}
@ -3858,7 +3877,7 @@ static int wait_quiesce_drain_ack(void)
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: wait_quiesce_drain_ack: %s Waiting on %d Drain ACK messages\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
num_outstanding));
while(0 < num_outstanding) {
@ -3892,7 +3911,7 @@ static int send_bookmarks(int peer_idx)
{
ompi_crcp_coord_pml_peer_ref_t *peer_ref;
orte_process_name_t peer_name;
orte_buffer_t *buffer = NULL;
opal_buffer_t *buffer = NULL;
int exit_status = OMPI_SUCCESS;
int ret;
@ -3900,7 +3919,7 @@ static int send_bookmarks(int peer_idx)
/*
* Find the peer structure for this peer
*/
peer_name.jobid = orte_process_info.my_name->jobid;
peer_name.jobid = ORTE_PROC_MY_NAME->jobid;
peer_name.vpid = peer_idx;
if( NULL == (peer_ref = find_peer(peer_name))) {
@ -3913,7 +3932,7 @@ static int send_bookmarks(int peer_idx)
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: send_bookmarks: %s -> %s Sending bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer_name),
peer_ref->total_send_msgs,
peer_ref->total_isend_msgs,
@ -3925,23 +3944,23 @@ static int send_bookmarks(int peer_idx)
/*
* Send the bookmarks to peer
*/
if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) {
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
exit_status = OMPI_ERROR;
goto cleanup;
}
PACK_BUFFER(buffer, (peer_ref->total_send_msgs), 1, ORTE_UINT32,
PACK_BUFFER(buffer, (peer_ref->total_send_msgs), 1, OPAL_UINT32,
"crcp:coord: send_bookmarks: Unable to pack total_send_msgs");
PACK_BUFFER(buffer, (peer_ref->total_isend_msgs), 1, ORTE_UINT32,
PACK_BUFFER(buffer, (peer_ref->total_isend_msgs), 1, OPAL_UINT32,
"crcp:coord: send_bookmarks: Unable to pack total_isend_msgs");
PACK_BUFFER(buffer, (peer_ref->total_send_init_msgs), 1, ORTE_UINT32,
PACK_BUFFER(buffer, (peer_ref->total_send_init_msgs), 1, OPAL_UINT32,
"crcp:coord: send_bookmarks: Unable to pack total_send_init_msgs");
PACK_BUFFER(buffer, (peer_ref->total_recv_msgs), 1, ORTE_UINT32,
PACK_BUFFER(buffer, (peer_ref->total_recv_msgs), 1, OPAL_UINT32,
"crcp:coord: send_bookmarks: Unable to pack total_recv_msgs");
PACK_BUFFER(buffer, (peer_ref->total_irecv_msgs), 1, ORTE_UINT32,
PACK_BUFFER(buffer, (peer_ref->total_irecv_msgs), 1, OPAL_UINT32,
"crcp:coord: send_bookmarks: Unable to pack total_irecv_msgs");
PACK_BUFFER(buffer, (peer_ref->total_recv_init_msgs), 1, ORTE_UINT32,
PACK_BUFFER(buffer, (peer_ref->total_recv_init_msgs), 1, OPAL_UINT32,
"crcp:coord: send_bookmarks: Unable to pack total_recv_init_msgs");
if ( 0 > ( ret = orte_rml.send_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
@ -3975,7 +3994,7 @@ static int recv_bookmarks(int peer_idx)
START_TIMER(CRCP_TIMER_CKPT_PEER_R);
peer_name.jobid = orte_process_info.my_name->jobid;
peer_name.jobid = ORTE_PROC_MY_NAME->jobid;
peer_name.vpid = peer_idx;
if ( 0 > (ret = orte_rml.recv_buffer_nb(&peer_name,
@ -4005,7 +4024,7 @@ static int recv_bookmarks(int peer_idx)
{
ompi_crcp_coord_pml_peer_ref_t *peer_ref;
orte_process_name_t peer_name;
orte_buffer_t * buffer = NULL;
opal_buffer_t * buffer = NULL;
int exit_status = OMPI_SUCCESS;
int ret, tmp_int;
@ -4014,7 +4033,7 @@ static int recv_bookmarks(int peer_idx)
/*
* Find the peer structure for this peer
*/
peer_name.jobid = orte_process_info.my_name->jobid;
peer_name.jobid = ORTE_PROC_MY_NAME->jobid;
peer_name.vpid = peer_idx;
if( NULL == (peer_ref = find_peer(peer_name))) {
@ -4028,7 +4047,7 @@ static int recv_bookmarks(int peer_idx)
/*
* Receive the bookmark from peer
*/
if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) {
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
exit_status = ORTE_ERROR;
goto cleanup;
}
@ -4042,29 +4061,29 @@ static int recv_bookmarks(int peer_idx)
goto cleanup;
}
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_send_msgs");
peer_ref->matched_send_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_isend_msgs");
peer_ref->matched_isend_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_send_init_msgs");
peer_ref->matched_send_init_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_recv_msgs");
peer_ref->matched_recv_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_irecv_msgs");
peer_ref->matched_irecv_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_recv_init_msgs");
peer_ref->matched_recv_init_msgs = tmp_int;
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_bookmarks: %s <- %s Received bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer_name),
peer_ref->matched_send_msgs,
peer_ref->matched_isend_msgs,
@ -4087,7 +4106,7 @@ static int recv_bookmarks(int peer_idx)
static void recv_bookmarks_cbfunc(int status,
orte_process_name_t* sender,
orte_buffer_t *buffer,
opal_buffer_t *buffer,
orte_rml_tag_t tag,
void* cbdata)
{
@ -4111,29 +4130,29 @@ static void recv_bookmarks_cbfunc(int status,
goto cleanup;
}
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_send_msgs");
peer_ref->matched_send_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_isend_msgs");
peer_ref->matched_isend_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_send_init_msgs");
peer_ref->matched_send_init_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_recv_msgs");
peer_ref->matched_recv_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_irecv_msgs");
peer_ref->matched_irecv_msgs = tmp_int;
UNPACK_BUFFER(buffer, tmp_int, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32,
"crcp:coord: recv_bookmarks: Unable to unpack total_recv_init_msgs");
peer_ref->matched_recv_init_msgs = tmp_int;
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_bookmarks: %s <- %s Received bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender),
peer_ref->matched_send_msgs,
peer_ref->matched_isend_msgs,
@ -4189,7 +4208,7 @@ static int send_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
if(OMPI_SUCCESS != (ret = do_send_msg_detail(peer_ref, msg_ref, &found_match, &finished)) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: send_msg_details: %s --> %s Failed to send message details to peer. Return %d\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
ret);
}
@ -4240,7 +4259,7 @@ static int send_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
opal_list_append(&drained_msg_ack_list, &(d_msg_ack->super));
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: send_msg_details: %s <--> %s Will wait on ACK from this peer.\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name))));
/*
@ -4261,7 +4280,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
bool *found_match,
bool *finished)
{
orte_buffer_t *buffer = NULL;
opal_buffer_t *buffer = NULL;
int32_t req_more = -1;
int comm_my_rank = -1;
int exit_status = OMPI_SUCCESS;
@ -4275,7 +4294,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
buffer = NULL;
}
if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) {
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
exit_status = OMPI_ERROR;
goto cleanup;
}
@ -4287,9 +4306,9 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
*/
comm_my_rank = ompi_comm_rank(msg_ref->comm);
PACK_BUFFER(buffer, msg_ref->comm->c_contextid, 1, ORTE_UINT32,
PACK_BUFFER(buffer, msg_ref->comm->c_contextid, 1, OPAL_UINT32,
"crcp:coord: send_msg_details: Unable to pack communicator ID");
PACK_BUFFER(buffer, comm_my_rank, 1, ORTE_INT,
PACK_BUFFER(buffer, comm_my_rank, 1, OPAL_INT,
"crcp:coord: send_msg_details: Unable to pack comm rank ID");
/*
@ -4298,11 +4317,11 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
* - Message count
* - Message Datatype size
*/
PACK_BUFFER(buffer, msg_ref->tag, 1, ORTE_INT,
PACK_BUFFER(buffer, msg_ref->tag, 1, OPAL_INT,
"crcp:coord: send_msg_details: Unable to pack tag");
PACK_BUFFER(buffer, msg_ref->count, 1, ORTE_SIZE,
PACK_BUFFER(buffer, msg_ref->count, 1, OPAL_SIZE,
"crcp:coord: send_msg_details: Unable to pack count");
PACK_BUFFER(buffer, msg_ref->ddt_size, 1, ORTE_SIZE,
PACK_BUFFER(buffer, msg_ref->ddt_size, 1, OPAL_SIZE,
"crcp:coord: send_msg_details: Unable to pack datatype size");
/*
@ -4327,7 +4346,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
/*
* Check return value from peer to see if we found a match.
*/
if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) {
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
exit_status = ORTE_ERROR;
goto cleanup;
}
@ -4339,14 +4358,14 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: do_send_msg_detail: %s --> %s Failed to receive ACK buffer from peer. Return %d\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
ret);
exit_status = ret;
goto cleanup;
}
UNPACK_BUFFER(buffer, req_more, 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, req_more, 1, OPAL_UINT32,
"crcp:coord: send_msg_details: Failed to unpack the ACK from peer buffer.");
/* Mark message as matched */
@ -4411,7 +4430,7 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_details: %s <-- %s "
"Failed to receive message detail from peer. Return %d\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
ret);
exit_status = ret;
@ -4431,7 +4450,7 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_details: %s <-- %s "
"Failed to check message detail from peer. Return %d\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
ret);
exit_status = ret;
@ -4458,7 +4477,7 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
if(OMPI_SUCCESS != (ret = do_recv_msg_detail_resp(peer_ref, response))) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_details: %s <-- %s Failed to respond to peer. Return %d\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
ret);
exit_status = ret;
@ -4475,11 +4494,11 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
int *rank, uint32_t *comm_id, int *tag,
size_t *count, size_t *datatype_size)
{
orte_buffer_t * buffer = NULL;
opal_buffer_t * buffer = NULL;
int exit_status = OMPI_SUCCESS;
int ret;
if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) {
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
exit_status = ORTE_ERROR;
goto cleanup;
}
@ -4490,7 +4509,7 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: do_recv_msg_detail: %s <-- %s Failed to receive buffer from peer. Return %d\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
ret);
exit_status = ret;
@ -4498,17 +4517,17 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
}
/* Pull out the communicator ID */
UNPACK_BUFFER(buffer, (*comm_id), 1, ORTE_UINT32,
UNPACK_BUFFER(buffer, (*comm_id), 1, OPAL_UINT32,
"crcp:coord: recv_msg_details: Failed to unpack the communicator ID");
UNPACK_BUFFER(buffer, (*rank), 1, ORTE_INT,
UNPACK_BUFFER(buffer, (*rank), 1, OPAL_INT,
"crcp:coord: recv_msg_details: Failed to unpack the communicator rank ID");
/* Pull out the message details */
UNPACK_BUFFER(buffer, (*tag), 1, ORTE_INT,
UNPACK_BUFFER(buffer, (*tag), 1, OPAL_INT,
"crcp:coord: recv_msg_details: Failed to unpack the tag");
UNPACK_BUFFER(buffer, (*count), 1, ORTE_SIZE,
UNPACK_BUFFER(buffer, (*count), 1, OPAL_SIZE,
"crcp:coord: recv_msg_details: Failed to unpack the count");
UNPACK_BUFFER(buffer, (*datatype_size), 1, ORTE_SIZE,
UNPACK_BUFFER(buffer, (*datatype_size), 1, OPAL_SIZE,
"crcp:coord: recv_msg_details: Failed to unpack the datatype size");
cleanup:
@ -4552,7 +4571,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_detail_check: %s -- %s "
"Failed to determine if we have received this message. Return %d\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
ret);
exit_status = ret;
@ -4562,7 +4581,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
OPAL_OUTPUT_VERBOSE((20, mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_detail_check: %s -- %s"
" found %s, complete %s, posted %s, peer_rank=[%d vs %d]\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
(true == msg_found ? "True " : "False"),
(true == msg_complete ? "True " : "False"),
@ -4580,7 +4599,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
OPAL_OUTPUT_VERBOSE((15, mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_detail_check: %s Found a message that needs to be drained\n",
ORTE_NAME_PRINT(orte_process_info.my_name) ));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/*
* Construct a message for draining
@ -4639,7 +4658,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_detail_check: %s "
"Found a message already posted! Prepare to drain.\n",
ORTE_NAME_PRINT(orte_process_info.my_name)));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/*
* If this is the current blocking recv,
@ -4650,7 +4669,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_detail_check: %s "
"Found a message already posted! Prepare to STALL.\n",
ORTE_NAME_PRINT(orte_process_info.my_name)));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
stall_for_completion = true;
}
/*
@ -4661,7 +4680,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
OPAL_OUTPUT_VERBOSE((10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_detail_check: %s "
"Found a message already posted! No stall required [%3d, %3d, %3d, %3d].\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)current_msg_id,
(int)current_msg_type,
(int)posted_msg_ref->msg_id,
@ -4679,7 +4698,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
* messages.
* JJH -- When do we use this?
*/
if (posted_msg_ref->rank != peer_ref->proc_name.vpid) {
if (posted_msg_ref->rank != (int)peer_ref->proc_name.vpid) {
posted_msg_ref->suggested_rank = rank;
}
@ -4709,7 +4728,7 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
else {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_msg_detail_check: ***** ERROR ***** %s Failed to find an action to use. This should never happen!\n",
ORTE_NAME_PRINT(orte_process_info.my_name));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
exit_status = OMPI_ERROR;
goto cleanup;
}
@ -4989,16 +5008,16 @@ static int find_message_named(opal_list_t * search_list,
static int do_recv_msg_detail_resp(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
int resp)
{
orte_buffer_t * buffer = NULL;
opal_buffer_t * buffer = NULL;
int exit_status = OMPI_SUCCESS;
int ret;
if (NULL == (buffer = OBJ_NEW(orte_buffer_t))) {
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
exit_status = OMPI_ERROR;
goto cleanup;
}
PACK_BUFFER(buffer, resp, 1, ORTE_UINT32,
PACK_BUFFER(buffer, resp, 1, OPAL_UINT32,
"crcp:coord: recv_msg_details: Unable to ask peer for more messages");
if ( 0 > ( ret = orte_rml.send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
@ -5066,7 +5085,7 @@ static void display_all_timers(int state) {
static void display_indv_timer(int idx, int var) {
double diff = timer_end[idx] - timer_start[idx];
if( 0 != orte_process_info.my_name->vpid ) {
if( 0 != ORTE_PROC_MY_NAME->vpid ) {
return;
}

Просмотреть файл

@ -29,7 +29,6 @@
#include "opal/mca/mca.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/communicator/communicator.h"
#include "orte/mca/ns/ns.h"
#include "opal/runtime/opal_cr.h"
#include "opal/threads/mutex.h"
#include "opal/threads/condition.h"

Просмотреть файл

@ -1,5 +1,5 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
@ -17,22 +17,22 @@
#
# main library setup
noinst_LTLIBRARIES = libmca_ns.la
libmca_ns_la_SOURCES =
noinst_LTLIBRARIES = libmca_dpm.la
libmca_dpm_la_SOURCES =
# header setup
nobase_orte_HEADERS =
nobase_ompi_HEADERS =
# local files
headers = ns.h ns_types.h
libmca_ns_la_SOURCES += $(headers)
headers = dpm.h
libmca_dpm_la_SOURCES += $(headers)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
nobase_orte_HEADERS += $(headers)
ortedir = $(includedir)/openmpi/orte/mca/ns
nobase_ompi_HEADERS += $(headers)
ompidir = $(includedir)/openmpi/ompi/mca/dpm
else
ortedir = $(includedir)
ompidir = $(includedir)
endif
include base/Makefile.am

29
ompi/mca/dpm/base/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,29 @@
#
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = base/help-ompi-dpm-base.txt
headers += \
base/base.h
libmca_dpm_la_SOURCES += \
base/dpm_base_open.c \
base/dpm_base_close.c \
base/dpm_base_select.c \
base/dpm_base_common_fns.c

86
ompi/mca/dpm/base/base.h Обычный файл
Просмотреть файл

@ -0,0 +1,86 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_MCA_DPM_BASE_H
#define OMPI_MCA_DPM_BASE_H
#include "ompi_config.h"
#include "ompi/constants.h"
#include "ompi/mca/dpm/dpm.h"
/*
* Global functions for MCA overall DPM
*/
BEGIN_C_DECLS
struct ompi_dpm_base_disconnect_obj {
ompi_communicator_t *comm;
int size;
struct ompi_request_t **reqs;
int buf;
};
typedef struct ompi_dpm_base_disconnect_obj ompi_dpm_base_disconnect_obj;
/**
* Initialize the DPM MCA framework
*
* @retval OMPI_SUCCESS Upon success
* @retval OMPI_ERROR Upon failures
*
* This function is invoked during ompi_init();
*/
OMPI_DECLSPEC int ompi_dpm_base_open(void);
/**
* Select an available component.
*
* @retval OMPI_SUCCESS Upon Success
* @retval OMPI_NOT_FOUND If no component can be selected
* @retval OMPI_ERROR Upon other failure
*
*/
OMPI_DECLSPEC int ompi_dpm_base_select(void);
/**
* Finalize the DPM MCA framework
*
* @retval OMPI_SUCCESS Upon success
* @retval OMPI_ERROR Upon failures
*
* This function is invoked during ompi_finalize();
*/
OMPI_DECLSPEC int ompi_dpm_base_close(void);
/* Internal support functions */
OMPI_DECLSPEC char* ompi_dpm_base_dyn_init (void);
OMPI_DECLSPEC int ompi_dpm_base_dyn_finalize (void);
OMPI_DECLSPEC void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm);
OMPI_DECLSPEC ompi_dpm_base_disconnect_obj *ompi_dpm_base_disconnect_init ( ompi_communicator_t *comm);
OMPI_DECLSPEC void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs);
/* useful globals */
OMPI_DECLSPEC extern int ompi_dpm_base_output;
OMPI_DECLSPEC extern opal_list_t ompi_dpm_base_components_available;
OMPI_DECLSPEC extern ompi_dpm_base_component_t ompi_dpm_base_selected_component;
OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm;
END_C_DECLS
#endif /* OMPI_MCA_DPM_BASE_H */

41
ompi/mca/dpm/base/dpm_base_close.c Обычный файл
Просмотреть файл

@ -0,0 +1,41 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/mca/dpm/base/base.h"
int ompi_dpm_base_close(void)
{
/* Close the selected component */
if( NULL != ompi_dpm.finalize ) {
ompi_dpm.finalize();
}
/* Close all available modules that are open */
mca_base_components_close(ompi_dpm_base_output,
&ompi_dpm_base_components_available,
NULL);
return OMPI_SUCCESS;
}

279
ompi/mca/dpm/base/dpm_base_common_fns.c Обычный файл
Просмотреть файл

@ -0,0 +1,279 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 University of Houston. All rights reserved.
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include <stdio.h>
#include "ompi/request/request.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/dpm/base/base.h"
char* ompi_dpm_base_dyn_init (void)
{
char *envvarname=NULL, *port_name=NULL;
/* check for appropriate env variable */
asprintf(&envvarname, "OMPI_PARENT_PORT");
port_name = getenv(envvarname);
free (envvarname);
return port_name;
}
/**********************************************************************/
/**********************************************************************/
/**********************************************************************/
/* this routine runs through the list of communicators
and does the disconnect for all dynamic communicators */
int ompi_dpm_base_dyn_finalize (void)
{
int i,j=0, max=0;
ompi_dpm_base_disconnect_obj **objs=NULL;
ompi_communicator_t *comm=NULL;
if ( 1 <ompi_comm_num_dyncomm ) {
objs = (ompi_dpm_base_disconnect_obj **)malloc (ompi_comm_num_dyncomm*
sizeof(ompi_dpm_base_disconnect_obj*));
if ( NULL == objs ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
max = opal_pointer_array_get_size(&ompi_mpi_communicators);
for ( i=3; i<max; i++ ) {
comm = (ompi_communicator_t*)opal_pointer_array_get_item(&ompi_mpi_communicators,i);
if ( OMPI_COMM_IS_DYNAMIC(comm)) {
objs[j++]=ompi_dpm_base_disconnect_init(comm);
}
}
if ( j != ompi_comm_num_dyncomm+1 ) {
free (objs);
return OMPI_ERROR;
}
ompi_dpm_base_disconnect_waitall (ompi_comm_num_dyncomm, objs);
free (objs);
}
return OMPI_SUCCESS;
}
/* the next two routines implement a kind of non-blocking barrier.
the only difference is, that you can wait for the completion
of more than one initiated ibarrier. This is required for waiting
for all still connected processes in MPI_Finalize.
ompi_comm_disconnect_init returns a handle, which has to be passed in
to ompi_comm_disconnect_waitall. The second routine blocks, until
all non-blocking barriers described by the handles are finished.
The communicators can than be released.
*/
/**********************************************************************/
/**********************************************************************/
/**********************************************************************/
ompi_dpm_base_disconnect_obj *ompi_dpm_base_disconnect_init ( ompi_communicator_t *comm)
{
ompi_dpm_base_disconnect_obj *obj=NULL;
int ret;
int i;
obj = (ompi_dpm_base_disconnect_obj *) calloc(1,sizeof(ompi_dpm_base_disconnect_obj));
if ( NULL == obj ) {
return NULL;
}
if ( OMPI_COMM_IS_INTER(comm) ) {
obj->size = ompi_comm_remote_size (comm);
} else {
obj->size = ompi_comm_size (comm);
}
obj->comm = comm;
obj->reqs = (ompi_request_t **) malloc(2*obj->size*sizeof(ompi_request_t *));
if ( NULL == obj->reqs ) {
free (obj);
return NULL;
}
/* initiate all isend_irecvs. We use a dummy buffer stored on
the object, since we are sending zero size messages anyway. */
for ( i=0; i < obj->size; i++ ) {
ret = MCA_PML_CALL(irecv (&(obj->buf), 0, MPI_INT, i,
OMPI_COMM_BARRIER_TAG, comm,
&(obj->reqs[2*i])));
if ( OMPI_SUCCESS != ret ) {
free (obj->reqs);
free (obj);
return NULL;
}
ret = MCA_PML_CALL(isend (&(obj->buf), 0, MPI_INT, i,
OMPI_COMM_BARRIER_TAG,
MCA_PML_BASE_SEND_SYNCHRONOUS,
comm, &(obj->reqs[2*i+1])));
if ( OMPI_SUCCESS != ret ) {
free (obj->reqs);
free (obj);
return NULL;
}
}
/* return handle */
return obj;
}
/**********************************************************************/
/**********************************************************************/
/**********************************************************************/
/* - count how many requests are active
* - generate a request array large enough to hold
all active requests
* - call waitall on the overall request array
* - free the objects
*/
void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs)
{
ompi_request_t **reqs=NULL;
char *treq=NULL;
int totalcount = 0;
int i;
int ret;
for (i=0; i<count; i++) {
if (NULL == objs[i]) {
printf("Error in comm_disconnect_waitall\n");
return;
}
totalcount += objs[i]->size;
}
reqs = (ompi_request_t **) malloc (2*totalcount*sizeof(ompi_request_t *));
if ( NULL == reqs ) {
printf("ompi_comm_disconnect_waitall: error allocating memory\n");
return;
}
/* generate a single, large array of pending requests */
treq = (char *)reqs;
for (i=0; i<count; i++) {
memcpy (treq, objs[i]->reqs, 2*objs[i]->size * sizeof(ompi_request_t *));
treq += 2*objs[i]->size * sizeof(ompi_request_t *);
}
/* force all non-blocking all-to-alls to finish */
ret = ompi_request_wait_all (2*totalcount, reqs, MPI_STATUSES_IGNORE);
/* Finally, free everything */
for (i=0; i< count; i++ ) {
if (NULL != objs[i]->reqs ) {
free (objs[i]->reqs );
free (objs[i]);
}
}
free (reqs);
/* decrease the counter for dynamic communicators by 'count'.
Attention, this approach now requires, that we are just using
these routines for communicators which have been flagged dynamic */
ompi_comm_num_dyncomm -=count;
return;
}
/**********************************************************************/
/**********************************************************************/
/**********************************************************************/
#define OMPI_DPM_BASE_MAXJOBIDS 64
void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm)
{
int i, j, numjobids=0;
int size, rsize;
int found;
orte_jobid_t jobids[OMPI_DPM_BASE_MAXJOBIDS], thisjobid;
ompi_group_t *grp=NULL;
ompi_proc_t *proc = NULL;
/* special case for MPI_COMM_NULL */
if ( comm == MPI_COMM_NULL ) {
return;
}
size = ompi_comm_size (comm);
rsize = ompi_comm_remote_size(comm);
/* loop over all processes in local group and count number
of different jobids. */
grp = comm->c_local_group;
for (i=0; i< size; i++) {
proc = ompi_group_peer_lookup(grp,i);
thisjobid = proc->proc_name.jobid;
found = 0;
for ( j=0; j<numjobids; j++) {
if (thisjobid == jobids[j]) {
found = 1;
break;
}
}
if (!found ) {
jobids[numjobids++] = thisjobid;
}
}
/* if inter-comm, loop over all processes in remote_group
and count number of different jobids */
grp = comm->c_remote_group;
for (i=0; i< rsize; i++) {
proc = ompi_group_peer_lookup(grp,i);
thisjobid = proc->proc_name.jobid;
found = 0;
for ( j=0; j<numjobids; j++) {
if ( thisjobid == jobids[j]) {
found = 1;
break;
}
}
if (!found ) {
jobids[numjobids++] = thisjobid;
}
}
/* if number of joibds larger than one, set the disconnect flag*/
if ( numjobids > 1 ) {
ompi_comm_num_dyncomm++;
OMPI_COMM_SET_DYNAMIC(comm);
}
return;
}

58
ompi/mca/dpm/base/dpm_base_open.c Обычный файл
Просмотреть файл

@ -0,0 +1,58 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/mca/dpm/base/base.h"
#include "ompi/mca/dpm/base/static-components.h"
/*
* Globals
*/
OMPI_DECLSPEC int ompi_dpm_base_output = -1;
OMPI_DECLSPEC ompi_dpm_base_module_t ompi_dpm;
opal_list_t ompi_dpm_base_components_available;
ompi_dpm_base_component_t ompi_dpm_base_selected_component;
/**
* Function for finding and opening either all MCA components,
* or the one that was specifically requested via a MCA parameter.
*/
int ompi_dpm_base_open(void)
{
/* Debugging/Verbose output */
ompi_dpm_base_output = opal_output_open(NULL);
/* Open up all available components */
if (OPAL_SUCCESS !=
mca_base_components_open("dpm",
ompi_dpm_base_output,
mca_dpm_base_static_components,
&ompi_dpm_base_components_available,
true)) {
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

138
ompi/mca/dpm/base/dpm_base_select.c Обычный файл
Просмотреть файл

@ -0,0 +1,138 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/base/mca_base_component_repository.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/mca/dpm/base/base.h"
int ompi_dpm_base_select(void)
{
opal_list_item_t *item;
mca_base_component_list_item_t *cli;
ompi_dpm_base_component_t *component, *best_component = NULL;
ompi_dpm_base_module_t *module, *best_module = NULL;
int priority, best_priority = -1;
int rc;
/* Query all the opened components and see if they want to run */
for (item = opal_list_get_first(&ompi_dpm_base_components_available);
opal_list_get_end(&ompi_dpm_base_components_available) != item;
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t *) item;
component = (ompi_dpm_base_component_t *) cli->cli_component;
OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output,
"ompi:dpm:base:select: querying component %s",
component->dpm_version.mca_component_name));
/* Call the component's init function and see if it wants to be
selected */
module = component->dpm_init(&priority);
/* If we got a non-NULL module back, then the component wants
to be considered for selection */
if (NULL != module) {
/* If this is the best one, save it */
if (priority > best_priority) {
/* If there was a previous best one, finalize */
if (NULL != best_module) {
OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output,
"ompi:dpm:base:select: found better component - finalizing component %s",
best_component->dpm_version.mca_component_name));
best_module->finalize();
}
/* Save the new best one */
best_module = module;
best_component = component;
/* update the best priority */
best_priority = priority;
} else {
OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output,
"ompi:dpm:base:select: component %s does did not win the election",
component->dpm_version.mca_component_name));
if (NULL == module->finalize) {
opal_output(ompi_dpm_base_output,
"It appears you are the victim of a stale library - please delete your installation lib directory and reinstall");
} else {
module->finalize();
}
}
}
}
/* If we didn't find one to select, barf */
if (NULL == best_component) {
return OMPI_ERROR;
}
OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output,
"ompi:dpm:base:select: component %s was selected",
best_component->dpm_version.mca_component_name));
/* We have happiness -- save the component and module for later
usage */
ompi_dpm = *best_module;
ompi_dpm_base_selected_component = *best_component;
/* unload all components that were not selected */
item = opal_list_get_first(&ompi_dpm_base_components_available);
while(item != opal_list_get_end(&ompi_dpm_base_components_available)) {
opal_list_item_t* next = opal_list_get_next(item);
ompi_dpm_base_component_t* component;
cli = (mca_base_component_list_item_t *) item;
component = (ompi_dpm_base_component_t *) cli->cli_component;
if(component != best_component) {
OPAL_OUTPUT_VERBOSE((10, ompi_dpm_base_output,
"ompi:dpm:base:select: module %s unloaded",
component->dpm_version.mca_component_name));
mca_base_component_repository_release((mca_base_component_t *) component);
opal_list_remove_item(&ompi_dpm_base_components_available, item);
OBJ_RELEASE(item);
}
item = next;
}
/* init the selected module */
if (NULL != ompi_dpm.init) {
if (OMPI_SUCCESS != (rc = ompi_dpm.init())) {
return rc;
}
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,22 +1,20 @@
# -*- shell-script -*-
-*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
#
# Additional copyrights may follow
#
#
# $HEADER$
#
PARAM_CONFIG_FILES="Makefile"
# This is the US/English general help file for OMPI DPM framework.
#

189
ompi/mca/dpm/dpm.h Обычный файл
Просмотреть файл

@ -0,0 +1,189 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Dynamic Process Management Interface
*
*/
#ifndef OMPI_MCA_DPM_H
#define OMPI_MCA_DPM_H
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/class/opal_object.h"
#include "ompi/info/info.h"
#include "ompi/communicator/communicator.h"
BEGIN_C_DECLS
/* OMPI port definitions */
#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX
#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1
#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2
#define OMPI_RML_TAG_XOPENIB OMPI_RML_TAG_BASE+3
#define OMPI_RML_TAG_COMM_CID_INTRA OMPI_RML_TAG_BASE+4
#define OMPI_RML_TAG_XOOB OMPI_RML_TAG_BASE+5
#define OMPI_RML_TAG_SM_BACK_FILE_CREATED OMPI_RML_TAG_BASE+6
#define OMPI_RML_TAG_WIREUP OMPI_RML_TAG_BASE+7
#define OMPI_CRCP_COORD_BOOKMARK_TAG OMPI_RML_TAG_BASE+8
#define OMPI_COMM_JOIN_TAG OMPI_RML_TAG_BASE+9
#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200
/*
* Initialize a module
*/
typedef int (*ompi_dpm_base_module_init_fn_t)(void);
/*
* Connect/accept communications
*/
typedef int (*ompi_dpm_base_module_connect_accept_fn_t)(ompi_communicator_t *comm, int root,
orte_process_name_t *port, bool send_first,
ompi_communicator_t **newcomm, orte_rml_tag_t tag);
/**
* Executes internally a disconnect on all dynamic communicators
* in case the user did not disconnect them.
*/
typedef void (*ompi_dpm_base_module_disconnect_fn_t)(ompi_communicator_t *comm);
/*
* Dynamically spawn processes
*/
typedef int (*ompi_dpm_base_module_spawn_fn_t)(int count, char **array_of_commands,
char ***array_of_argv,
int *array_of_maxprocs,
MPI_Info *array_of_info,
char *port_name);
/*
* This routine checks, whether an application has been spawned
* by another MPI application, or has been independently started.
* If it has been spawned, it establishes the parent communicator.
* Since the routine has to communicate, it should be among the last
* steps in MPI_Init, to be sure that everything is already set up.
*/
typedef int (*ompi_dpm_base_module_dyn_init_fn_t)(void);
/*
* Interface for mpi_finalize to call to ensure dynamically spawned procs
* collectively finalize
*/
typedef int (*ompi_dpm_base_module_dyn_finalize_fn_t)(void);
/* this routine counts the number of different jobids of the processes
given in a certain communicator. If there is more than one jobid,
we mark the communicator as 'dynamic'. This is especially relevant
for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have
to wait for all still connected processes.
*/
typedef void (*ompi_dpm_base_module_mark_dyncomm_fn_t)(ompi_communicator_t *comm);
/*
* Open a port to interface to a dynamically spawned job
*/
typedef int (*ompi_dpm_base_module_open_port_fn_t)(char *port_name);
/*
* Parse a port name to get the contact info and tag
*/
typedef char* (*ompi_dpm_base_module_parse_port_fn_t)(char *port_name, orte_rml_tag_t *tag);
/*
* Close a port
*/
typedef int (*ompi_dpm_base_module_close_port_fn_t)(char *port_name);
/*
* Finalize a module
*/
typedef int (*ompi_dpm_base_module_finalize_fn_t)(void);
/**
* Structure for DPM v1.0.0 modules
*/
struct ompi_dpm_base_module_1_0_0_t {
/** Initialization Function */
ompi_dpm_base_module_init_fn_t init;
/* connect/accept */
ompi_dpm_base_module_connect_accept_fn_t connect_accept;
/* disconnect */
ompi_dpm_base_module_disconnect_fn_t disconnect;
/* spawn processes */
ompi_dpm_base_module_spawn_fn_t spawn;
/* dyn_init */
ompi_dpm_base_module_dyn_init_fn_t dyn_init;
/* dyn_finalize */
ompi_dpm_base_module_dyn_finalize_fn_t dyn_finalize;
/* mark dyncomm */
ompi_dpm_base_module_mark_dyncomm_fn_t mark_dyncomm;
/* open port */
ompi_dpm_base_module_open_port_fn_t open_port;
/* parse port */
ompi_dpm_base_module_parse_port_fn_t parse_port;
/* close port */
ompi_dpm_base_module_close_port_fn_t close_port;
/* finalize */
ompi_dpm_base_module_finalize_fn_t finalize;
};
typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_1_0_0_t;
typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_t;
OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm;
typedef struct ompi_dpm_base_module_1_0_0_t*
(*ompi_dpm_base_component_init_fn_t)(int *priority);
/**
* Structure for DPM v1.0.0 components.
*/
struct ompi_dpm_base_component_1_0_0_t {
/** MCA base component */
mca_base_component_t dpm_version;
/** MCA base data */
mca_base_component_data_1_0_0_t dpm_data;
/* component selection */
ompi_dpm_base_component_init_fn_t dpm_init;
};
typedef struct ompi_dpm_base_component_1_0_0_t ompi_dpm_base_component_1_0_0_t;
typedef struct ompi_dpm_base_component_1_0_0_t ompi_dpm_base_component_t;
/**
* Macro for use in components that are of type CRCP v1.0.0
*/
#define OMPI_DPM_BASE_VERSION_1_0_0 \
/* DPM v1.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* DPM v1.0 */ \
"dpm", 1, 0, 0
END_C_DECLS
#endif /* OMPI_MCA_DPM_H */

40
ompi/mca/dpm/orte/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,40 @@
#
# Copyright (c) 2004-2006 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = help-ompi-dpm-orte.txt
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_dpm_orte_DSO
component_noinst =
component_install = mca_dpm_orte.la
else
component_noinst = libmca_dpm_orte.la
component_install =
endif
local_sources = \
dpm_orte.c \
dpm_orte.h \
dpm_orte_component.c
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_dpm_orte_la_SOURCES = $(local_sources)
mca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_dpm_orte_la_SOURCES = $(local_sources)
libmca_dpm_orte_la_LIBADD = $(dpm_orte_LIBS)
libmca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS)

920
ompi/mca/dpm/orte/dpm_orte.c Обычный файл
Просмотреть файл

@ -0,0 +1,920 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
#include "opal/util/opal_getcwd.h"
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/plm/plm.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/routed.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_data_server.h"
#include "ompi/communicator/communicator.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/info/info.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/mca/dpm/base/base.h"
#include "dpm_orte.h"
/* Local static variables */
static opal_mutex_t ompi_dpm_port_mutex;
static orte_rml_tag_t next_tag;
/*
* Init the module
*/
static int init(void)
{
OBJ_CONSTRUCT(&ompi_dpm_port_mutex, opal_mutex_t);
next_tag = OMPI_RML_TAG_DYNAMIC;
return OMPI_SUCCESS;
}
static int get_rport (orte_process_name_t *port,
int send_first, struct ompi_proc_t *proc,
orte_rml_tag_t tag, orte_process_name_t *rport);
static int connect_accept ( ompi_communicator_t *comm, int root,
orte_process_name_t *port, bool send_first,
ompi_communicator_t **newcomm, orte_rml_tag_t tag )
{
int size, rsize, rank, rc;
orte_std_cntr_t num_vals;
orte_std_cntr_t rnamebuflen = 0;
int rnamebuflen_int = 0;
void *rnamebuf=NULL;
ompi_communicator_t *newcomp=MPI_COMM_NULL;
ompi_proc_t **rprocs=NULL;
ompi_group_t *group=comm->c_local_group;
orte_process_name_t *rport=NULL, tmp_port_name;
opal_buffer_t *nbuf=NULL, *nrbuf=NULL;
ompi_proc_t **proc_list=NULL, **new_proc_list;
int i,j, new_proc_len;
ompi_group_t *new_group_pointer;
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
"%s dpm:orte:connect_accept with port %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(port)));
size = ompi_comm_size ( comm );
rank = ompi_comm_rank ( comm );
/* tell the progress engine to tick the event library more
often, to make sure that the OOB messages get sent */
opal_progress_event_users_increment();
if ( rank == root ) {
/* The process receiving first does not have yet the contact
information of the remote process. Therefore, we have to
exchange that.
*/
if(!OMPI_GROUP_IS_DENSE(group)) {
proc_list = (ompi_proc_t **) calloc (group->grp_proc_count,
sizeof (ompi_proc_t *));
for(i=0 ; i<group->grp_proc_count ; i++)
proc_list[i] = ompi_group_peer_lookup(group,i);
}
if ( OMPI_COMM_JOIN_TAG != tag ) {
if(OMPI_GROUP_IS_DENSE(group)){
rc = get_rport(port,send_first,
group->grp_proc_pointers[rank], tag,
&tmp_port_name);
}
else {
rc = get_rport(port,send_first,
proc_list[rank], tag,
&tmp_port_name);
}
if (OMPI_SUCCESS != rc) {
return rc;
}
rport = &tmp_port_name;
} else {
rport = port;
}
/* Generate the message buffer containing the number of processes and the list of
participating processes */
nbuf = OBJ_NEW(opal_buffer_t);
if (NULL == nbuf) {
return OMPI_ERROR;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(nbuf, &size, 1, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
goto exit;
}
if(OMPI_GROUP_IS_DENSE(group)) {
ompi_proc_pack(group->grp_proc_pointers, size, nbuf);
}
else {
ompi_proc_pack(proc_list, size, nbuf);
}
nrbuf = OBJ_NEW(opal_buffer_t);
if (NULL == nrbuf ) {
rc = OMPI_ERROR;
goto exit;
}
/* Exchange the number and the list of processes in the groups */
if ( send_first ) {
rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0);
} else {
rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0);
rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
}
if (ORTE_SUCCESS != (rc = opal_dss.unload(nrbuf, &rnamebuf, &rnamebuflen))) {
ORTE_ERROR_LOG(rc);
goto exit;
}
}
/* First convert the size_t to an int so we can cast in the bcast to a void *
* if we don't then we will get badness when using big vs little endian
* THIS IS NO LONGER REQUIRED AS THE LENGTH IS NOW A STD_CNTR_T, WHICH
* CORRELATES TO AN INT32
*/
rnamebuflen_int = (int)rnamebuflen;
/* bcast the buffer-length to all processes in the local comm */
rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm,
comm->c_coll.coll_bcast_module);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
rnamebuflen = rnamebuflen_int;
if ( rank != root ) {
/* non root processes need to allocate the buffer manually */
rnamebuf = (char *) malloc(rnamebuflen);
if ( NULL == rnamebuf ) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
}
/* bcast list of processes to all procs in local group
and reconstruct the data. Note that proc_get_proclist
adds processes, which were not known yet to our
process pool.
*/
rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm,
comm->c_coll.coll_bcast_module);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
nrbuf = OBJ_NEW(opal_buffer_t);
if (NULL == nrbuf) {
goto exit;
}
if ( ORTE_SUCCESS != ( rc = opal_dss.load(nrbuf, rnamebuf, rnamebuflen))) {
ORTE_ERROR_LOG(rc);
goto exit;
}
num_vals = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(nrbuf, &rsize, &num_vals, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
goto exit;
}
rc = ompi_proc_unpack(nrbuf, rsize, &rprocs, &new_proc_len, &new_proc_list);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
/* If we added new procs, we need to do the modex and then call
PML add_procs */
if (new_proc_len > 0) {
opal_list_t all_procs;
orte_namelist_t *name;
OBJ_CONSTRUCT(&all_procs, opal_list_t);
if (send_first) {
for (i = 0 ; i < group->grp_proc_count ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = ompi_group_peer_lookup(group, i)->proc_name;
opal_list_append(&all_procs, &name->item);
}
for (i = 0 ; i < rsize ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = rprocs[i]->proc_name;
opal_list_append(&all_procs, &name->item);
}
} else {
for (i = 0 ; i < rsize ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = rprocs[i]->proc_name;
opal_list_append(&all_procs, &name->item);
}
for (i = 0 ; i < group->grp_proc_count ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = ompi_group_peer_lookup(group, i)->proc_name;
opal_list_append(&all_procs, &name->item);
}
}
if (OMPI_SUCCESS != (rc = orte_grpcomm.modex(&all_procs))) {
ORTE_ERROR_LOG(rc);
goto exit;
}
/*
while (NULL != (item = opal_list_remove_first(&all_procs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&all_procs);
*/
MCA_PML_CALL(add_procs(new_proc_list, new_proc_len));
}
OBJ_RELEASE(nrbuf);
if ( rank == root ) {
OBJ_RELEASE(nbuf);
}
new_group_pointer=ompi_group_allocate(rsize);
if( NULL == new_group_pointer ) {
return MPI_ERR_GROUP;
}
/* put group elements in the list */
for (j = 0; j < rsize; j++) {
new_group_pointer->grp_proc_pointers[j] = rprocs[j];
} /* end proc loop */
/* increment proc reference counters */
ompi_group_increment_proc_count(new_group_pointer);
/* set up communicator structure */
rc = ompi_comm_set ( &newcomp, /* new comm */
comm, /* old comm */
group->grp_proc_count, /* local_size */
NULL, /* local_procs */
rsize, /* remote_size */
NULL , /* remote_procs */
NULL, /* attrs */
comm->error_handler, /* error handler */
NULL, /* topo component */
group, /* local group */
new_group_pointer /* remote group */
);
if ( NULL == newcomp ) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
ompi_group_decrement_proc_count (new_group_pointer);
OBJ_RELEASE(new_group_pointer);
new_group_pointer = MPI_GROUP_NULL;
/* allocate comm_cid */
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
rport, /* remote leader */
OMPI_COMM_CID_INTRA_OOB, /* mode */
send_first ); /* send or recv first */
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
/* activate comm and init coll-component */
rc = ompi_comm_activate ( newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
rport, /* remote leader */
OMPI_COMM_CID_INTRA_OOB, /* mode */
send_first, /* send or recv first */
0); /* sync_flag */
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
/* Question: do we have to re-start some low level stuff
to enable the usage of fast communication devices
between the two worlds ?
*/
exit:
/* done with OOB and such - slow our tick rate again */
opal_progress();
opal_progress_event_users_decrement();
if ( NULL != rprocs ) {
free ( rprocs );
}
if ( NULL != proc_list ) {
free ( proc_list );
}
if ( OMPI_SUCCESS != rc ) {
if ( MPI_COMM_NULL != newcomp && NULL != newcomp ) {
OBJ_RETAIN(newcomp);
newcomp = MPI_COMM_NULL;
}
}
*newcomm = newcomp;
return rc;
}
static void disconnect(ompi_communicator_t *comm)
{
ompi_dpm_base_disconnect_obj *dobj;
dobj = ompi_dpm_base_disconnect_init (comm);
ompi_dpm_base_disconnect_waitall(1, &dobj);
}
/**********************************************************************/
/**********************************************************************/
/**********************************************************************/
/*
* This routine is necessary, since in the connect/accept case, the processes
* executing the connect operation have the OOB contact information of the
* leader of the remote group, however, the processes executing the
* accept get their own port_name = OOB contact information passed in as
* an argument. This is however useless.
*
* Therefore, the two root processes exchange this information at this
* point.
*
*/
int get_rport(orte_process_name_t *port, int send_first,
ompi_proc_t *proc, orte_rml_tag_t tag,
orte_process_name_t *rport_name)
{
int rc;
orte_std_cntr_t num_vals;
if ( send_first ) {
opal_buffer_t *sbuf;
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
"%s dpm:orte:get_rport sending to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(port)));
sbuf = OBJ_NEW(opal_buffer_t);
if (NULL == sbuf) {
return OMPI_ERROR;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(sbuf, &(proc->proc_name), 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sbuf);
return rc;
}
rc = orte_rml.send_buffer(port, sbuf, tag, 0);
OBJ_RELEASE(sbuf);
if ( 0 > rc ) {
ORTE_ERROR_LOG(rc);
return rc;
}
*rport_name = *port;
} else {
opal_buffer_t *rbuf;
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
"%s dpm:orte:get_rport waiting to recv",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
rbuf = OBJ_NEW(opal_buffer_t);
if (NULL == rbuf) {
return ORTE_ERROR;
}
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, rbuf, tag, 0))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(rbuf);
return rc;
}
num_vals = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, rport_name, &num_vals, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(rbuf);
return rc;
}
OBJ_RELEASE(rbuf);
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
"%s dpm:orte:get_rport recv'd name %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(rport_name)));
}
return OMPI_SUCCESS;
}
static int spawn(int count, char **array_of_commands,
char ***array_of_argv,
int *array_of_maxprocs,
MPI_Info *array_of_info,
char *port_name)
{
int rc, i, j, counter;
int have_wdir=0;
bool have_prefix;
int valuelen=OMPI_PATH_MAX, flag=0;
char cwd[OMPI_PATH_MAX];
char host[OMPI_PATH_MAX]; /*** should define OMPI_HOST_MAX ***/
char prefix[OMPI_PATH_MAX];
char *base_prefix=NULL;
orte_job_t *jdata;
orte_std_cntr_t dummy;
orte_app_context_t *app;
bool timing = false;
struct timeval ompistart, ompistop;
/* parse the info object */
/* check potentially for:
- "host": desired host where to spawn the processes
- "hostfile": hostfile containing hosts where procs are
to be spawned
- "add-host": add the specified hosts to the known list
of available resources and spawn these
procs on them
- "add-hostfile": add the hosts in the hostfile to the
known list of available resources and spawn
these procs on them
- "prefix": the path to the root of the directory tree where ompi
executables and libraries can be found on all nodes
used to spawn these procs
- "arch": desired architecture
- "wdir": directory, where executable can be found
- "path": list of directories where to look for the executable
- "file": filename, where additional information is provided.
- "soft": see page 92 of MPI-2.
*/
/* make sure the progress engine properly trips the event library */
opal_progress_event_users_increment();
/* setup the job object */
jdata = OBJ_NEW(orte_job_t);
/* Convert the list of commands to an array of orte_app_context_t
pointers */
for (i = 0; i < count; ++i) {
app = OBJ_NEW(orte_app_context_t);
if (NULL == app) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(jdata);
opal_progress_event_users_decrement();
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* add the app to the job data */
orte_pointer_array_add(&dummy, jdata->apps, app);
jdata->num_apps++;
/* copy over the name of the executable */
app->app = strdup(array_of_commands[i]);
if (NULL == app->app) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(jdata);
opal_progress_event_users_decrement();
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* record the number of procs to be generated */
app->num_procs = array_of_maxprocs[i];
/* copy over the argv array */
counter = 1;
if (MPI_ARGVS_NULL != array_of_argv &&
MPI_ARGV_NULL != array_of_argv[i]) {
/* first need to find out how many entries there are */
j=0;
while (NULL != array_of_argv[i][j]) {
j++;
}
counter += j;
}
/* now copy them over, ensuring to NULL terminate the array */
app->argv = (char**)malloc((1 + counter) * sizeof(char*));
if (NULL == app->argv) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(jdata);
opal_progress_event_users_decrement();
return ORTE_ERR_OUT_OF_RESOURCE;
}
app->argv[0] = strdup(array_of_commands[i]);
for (j=1; j < counter; j++) {
app->argv[j] = strdup(array_of_argv[i][j-1]);
}
app->argv[counter] = NULL;
/* the environment gets set by the launcher
* all we need to do is add the specific values
* needed for comm_spawn
*/
/* Add environment variable with the contact information for the
child processes.
*/
counter = 1;
app->env = (char**)malloc((1+counter) * sizeof(char*));
if (NULL == app->env) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(jdata);
opal_progress_event_users_decrement();
return ORTE_ERR_OUT_OF_RESOURCE;
}
asprintf(&(app->env[0]), "OMPI_PARENT_PORT=%s", port_name);
app->env[1] = NULL;
for (j = 0; NULL != environ[j]; ++j) {
if (0 == strncmp("OMPI_", environ[j], 5)) {
opal_argv_append_nosize(&app->env, environ[j]);
}
}
/* Check for well-known info keys */
have_wdir = 0;
have_prefix = false;
if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) {
/* check for 'host' */
ompi_info_get (array_of_info[i], "host", sizeof(host), host, &flag);
if ( flag ) {
app->num_map = 1;
app->map_data = (orte_app_context_map_t **) malloc(sizeof(orte_app_context_map_t *));
app->map_data[0] = OBJ_NEW(orte_app_context_map_t);
app->map_data[0]->map_type = ORTE_APP_CONTEXT_MAP_HOSTNAME;
app->map_data[0]->map_data = strdup(host);
}
/* check for 'hostfile' */
ompi_info_get (array_of_info[i], "hostfile", sizeof(host), host, &flag);
if ( flag ) {
app->hostfile = strdup(host);
}
/* check for 'add-host' */
ompi_info_get (array_of_info[i], "add-host", sizeof(host), host, &flag);
if ( flag ) {
app->num_map = 1;
app->map_data = (orte_app_context_map_t **) malloc(sizeof(orte_app_context_map_t *));
app->map_data[0] = OBJ_NEW(orte_app_context_map_t);
app->map_data[0]->map_type = ORTE_APP_CONTEXT_MAP_ADD_HOSTNAME;
app->map_data[0]->map_data = strdup(host);
}
/* check for 'add-hostfile' */
ompi_info_get (array_of_info[i], "add-hostfile", sizeof(host), host, &flag);
if ( flag ) {
app->add_hostfile = strdup(host);
}
/* 'path', 'arch', 'file', 'soft' -- to be implemented */
/* check for 'ompi_prefix' (OMPI-specific -- to effect the same
* behavior as --prefix option to orterun)
*/
ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix), prefix, &flag);
if ( flag ) {
app->prefix_dir = strdup(prefix);
have_prefix = true;
}
/* check for 'wdir' */
ompi_info_get (array_of_info[i], "wdir", valuelen, cwd, &flag);
if ( flag ) {
app->cwd = strdup(cwd);
have_wdir = 1;
}
/* check for 'ompi_local_slave' - OMPI-specific -- indicates that
* the specified app is to be launched by the local orted as a
* "slave" process, typically to support an attached co-processor
*/
ompi_info_get_bool(array_of_info[i], "ompi_local_slave", &jdata->local_spawn, &flag);
}
/* default value: If the user did not tell us where to look for the
executable, we assume the current working directory */
if ( !have_wdir ) {
if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OMPI_PATH_MAX))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(jdata);
opal_progress_event_users_decrement();
return rc;
}
app->cwd = strdup(cwd);
}
/* if the user told us a new prefix, then we leave it alone. otherwise, if
* a prefix had been provided before, copy that one into the new app_context
* for use by the spawned children
*/
if ( !have_prefix && NULL != base_prefix) {
app->prefix_dir = strdup(base_prefix);
}
/* leave the map info alone - the launcher will
* decide where to put things
*/
} /* for (i = 0 ; i < count ; ++i) */
/* cleanup */
if (NULL != base_prefix) {
free(base_prefix);
}
/* check for timing request - get stop time and report elapsed time if so */
if (timing) {
if (0 != gettimeofday(&ompistop, NULL)) {
opal_output(0, "ompi_comm_start_procs: could not obtain stop time");
} else {
opal_output(0, "ompi_comm_start_procs: time from start to prepare to spawn %ld usec",
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
(ompistop.tv_usec - ompistart.tv_usec)));
if (0 != gettimeofday(&ompistart, NULL)) {
opal_output(0, "ompi_comm_start_procs: could not obtain new start time");
ompistart.tv_sec = ompistop.tv_sec;
ompistart.tv_usec = ompistop.tv_usec;
}
}
}
/* spawn procs */
rc = orte_plm.spawn(jdata);
OBJ_RELEASE(jdata);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
opal_progress_event_users_decrement();
return MPI_ERR_SPAWN;
}
/* check for timing request - get stop time and report elapsed time if so */
if (timing) {
if (0 != gettimeofday(&ompistop, NULL)) {
opal_output(0, "ompi_comm_start_procs: could not obtain stop time");
} else {
opal_output(0, "ompi_comm_start_procs: time to spawn %ld usec",
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
(ompistop.tv_usec - ompistart.tv_usec)));
}
}
/* clean up */
opal_progress_event_users_decrement();
return OMPI_SUCCESS;
}
static int open_port(char *port_name)
{
char *rml_uri, *ptr, tag[12];
int rc;
/*
* The port_name is equal to the OOB-contact information
* and an RML tag. The reason for adding the tag is
* to make the port unique for multi-threaded scenarios.
*/
if (NULL == (rml_uri = orte_rml.get_contact_info())) {
return OMPI_ERR_NOT_AVAILABLE;
}
sprintf(tag, "%d", (int)next_tag);
/* if the overall port name is too long, we try to truncate the rml uri */
rc = 0;
while ((strlen(rml_uri)+strlen(tag)) > (MPI_MAX_PORT_NAME-2)) {
/* if we have already tried several times, punt! */
if (4 < rc) {
free(rml_uri);
return OMPI_ERROR;
}
/* find the trailing uri and truncate there */
ptr = strrchr(rml_uri, ';');
*ptr = '\0';
++rc;
}
OPAL_THREAD_LOCK(&ompi_dpm_port_mutex);
sprintf (port_name, "%s:%s", rml_uri, tag);
next_tag++;
OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex);
free ( rml_uri );
return OMPI_SUCCESS;
}
/* takes a port_name and separates it into the RML URI
* and the tag
*/
static char *parse_port (char *port_name, orte_rml_tag_t *tag)
{
char *tmp_string, *ptr;
/* find the ':' demarking the RML tag we added to the end */
if (NULL == (ptr = strrchr(port_name, ':'))) {
return NULL;
}
/* terminate the port_name at that location */
*ptr = '\0';
ptr++;
/* convert the RML tag */
sscanf(ptr,"%d", (int*)tag);
/* see if the length of the RML uri is too long - if so,
* truncate it
*/
if (strlen(port_name) > MPI_MAX_PORT_NAME) {
port_name[MPI_MAX_PORT_NAME] = '\0';
}
/* copy the RML uri so we can return a malloc'd value
* that can later be free'd
*/
tmp_string = strdup(port_name);
return tmp_string;
}
static int close_port(char *port_name)
{
return OMPI_SUCCESS;
}
static int dyn_init(void)
{
char *oob_port=NULL;
char *port_name=NULL;
int root=0, rc;
bool send_first = true;
orte_rml_tag_t tag;
ompi_communicator_t *newcomm=NULL;
orte_process_name_t port_proc_name;
ompi_group_t *group = NULL;
ompi_errhandler_t *errhandler = NULL;
ompi_communicator_t *oldcomm;
/* if env-variable is set, we are a dynamically spawned
* child - parse port and call comm_connect_accept */
if (NULL == (port_name = ompi_dpm_base_dyn_init())) {
/* nothing to do */
return OMPI_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
"%s dpm:orte:dyn_init with port %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
port_name));
/* split the content of the environment variable into
its pieces, which are RML-uri:tag */
oob_port = parse_port (port_name, &tag);
/* set the contact info into the local hash table */
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(oob_port))) {
ORTE_ERROR_LOG(rc);
free(oob_port);
return(rc);
}
/* process the RML uri to get the port's process name */
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(oob_port, &port_proc_name, NULL))) {
ORTE_ERROR_LOG(rc);
free(oob_port);
return rc;
}
free(oob_port); /* done with this */
/* update the route to this process - in this case, we always give it
* as direct since we were given the contact info. We trust the
* selected routed component to do the Right Thing for its own mode
* of operation
*/
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&port_proc_name, &port_proc_name))) {
ORTE_ERROR_LOG(rc);
return rc;
}
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
"%s dpm:orte:dyn_init calling connect_accept to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&port_proc_name)));
rc = connect_accept (MPI_COMM_WORLD, root, &port_proc_name,
send_first, &newcomm, tag );
if (OMPI_SUCCESS != rc) {
return rc;
}
/* Set the parent communicator */
ompi_mpi_comm_parent = newcomm;
/* originally, we set comm_parent to comm_null (in comm_init),
* now we have to decrease the reference counters to the according
* objects
*/
oldcomm = &ompi_mpi_comm_null;
OBJ_RELEASE(oldcomm);
group = &ompi_mpi_group_null;
OBJ_RELEASE(group);
errhandler = &ompi_mpi_errors_are_fatal;
OBJ_RELEASE(errhandler);
/* Set name for debugging purposes */
snprintf(newcomm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT");
return OMPI_SUCCESS;
}
/*
* finalize the module
*/
static int finalize(void)
{
OBJ_DESTRUCT(&ompi_dpm_port_mutex);
return OMPI_SUCCESS;
}
/*
* instantiate the module
*/
ompi_dpm_base_module_t ompi_dpm_orte_module = {
init,
connect_accept,
disconnect,
spawn,
dyn_init,
ompi_dpm_base_dyn_finalize,
ompi_dpm_base_mark_dyncomm,
open_port,
parse_port,
close_port,
finalize
};

37
ompi/mca/dpm/orte/dpm_orte.h Обычный файл
Просмотреть файл

@ -0,0 +1,37 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_DPM_ORTE_H
#define OMPI_DPM_ORTE_H
#include "ompi_config.h"
#include "orte/types.h"
#include "ompi/mca/dpm/dpm.h"
BEGIN_C_DECLS
/* access to module */
extern ompi_dpm_base_module_t ompi_dpm_orte_module;
OMPI_MODULE_DECLSPEC extern ompi_dpm_base_component_t mca_dpm_orte_component;
END_C_DECLS
#endif /* OMPI_DPM_ORTE_H */

72
ompi/mca/dpm/orte/dpm_orte_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,72 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include "dpm_orte.h"
static int dpm_orte_component_open(void);
static int dpm_orte_component_close(void);
static ompi_dpm_base_module_t* dpm_orte_component_init( int* priority );
ompi_dpm_base_component_t mca_dpm_orte_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a dpm v1.0.0 component (which also implies
a specific MCA version) */
OMPI_DPM_BASE_VERSION_1_0_0,
"orte", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
dpm_orte_component_open, /* component open */
dpm_orte_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* This component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
dpm_orte_component_init, /* component init */
};
int dpm_orte_component_open(void)
{
return OMPI_SUCCESS;
}
int dpm_orte_component_close(void)
{
return OMPI_SUCCESS;
}
ompi_dpm_base_module_t* dpm_orte_component_init(int* priority)
{
*priority = 50;
return &ompi_dpm_orte_module;
}

43
ompi/mca/dpm/orte/help-ompi-dpm-orte.txt Обычный файл
Просмотреть файл

@ -0,0 +1,43 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open MPI.
#
[dpm-orte:no-server]
Process rank %ld attempted to %s a global ompi_server that
could not be contacted. This is typically caused by either not
specifying the contact info for the server, or by the server not
currently executing. If you did specify the contact info for a
server, please check to see that the server is running and start
it again (or have your sys admin start it) if it isn't.
[dpm-orte:unknown-order]
Process rank %ld attempted to lookup a value but provided an
unrecognized order parameter. Order parameters are used to tell Open
MPI if it should first look for the requested value locally (i.e., from
the current job) or from a global ompi_server. Accepted order
parameters are "local" and "global", respectively.
[dpm-orte:too-many-orders]
Process rank %ld attempted to lookup a value but provided too many
order parameters (%ld found). Order parameters are used to tell
Open MPI if it should first look for the requested value locally
(i.e., from the current job) or from a global ompi_server. Accepted
order parameters are "local" and "global", respectively, and each can
only be specified once.

Просмотреть файл

Просмотреть файл

@ -23,8 +23,8 @@
#include "opal/util/output.h"
#include "mpool_base_mem_cb.h"
#include "base.h"
#include "orte/types.h"
#include "orte/util/proc_info.h"
#include "orte/mca/ns/ns_types.h"
opal_pointer_array_t mca_mpool_base_mem_cb_array;

Просмотреть файл

@ -26,9 +26,10 @@
#include "opal/mca/mca.h"
#include "opal/util/show_help.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/util/name_fns.h"
#include "orte/util/proc_info.h"
#include "orte/util/sys_info.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/runtime/params.h"
#include "mpool_base_tree.h"
@ -172,13 +173,13 @@ void mca_mpool_base_tree_print(void)
if (num_leaks <= ompi_debug_show_mpi_alloc_mem_leaks ||
ompi_debug_show_mpi_alloc_mem_leaks < 0) {
opal_show_help("help-mpool-base.txt", "all mem leaks",
true, ORTE_NAME_PRINT(orte_process_info.my_name),
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
orte_system_info.nodename,
orte_process_info.pid, leak_msg);
} else {
int i = num_leaks - ompi_debug_show_mpi_alloc_mem_leaks;
opal_show_help("help-mpool-base.txt", "some mem leaks",
true, ORTE_NAME_PRINT(orte_process_info.my_name),
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
orte_system_info.nodename,
orte_process_info.pid, leak_msg, i,
(i > 1) ? "s were" : " was",

Просмотреть файл

@ -25,6 +25,8 @@
#include "ompi_config.h"
#include "opal/include/opal/align.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "opal/util/output.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include <errno.h>
@ -431,7 +433,7 @@ void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
if(true == mca_mpool_rdma_component.print_stats) {
opal_output(0, "%s rdma: stats "
"(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
ORTE_NAME_PRINT(orte_process_info.my_name),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss,
mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound,
mpool_rdma->stat_evicted);

Просмотреть файл

@ -21,10 +21,6 @@
#include <sys/time.h>
#include <time.h>
#include "ompi/types.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "opal/util/output.h"
#include "mtl_mx.h"
#include "mtl_mx_types.h"

Просмотреть файл

@ -25,7 +25,11 @@
#include "opal/runtime/opal_progress.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/constants.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
@ -165,10 +169,10 @@ int mca_pml_base_select(bool enable_progress_threads,
if( NULL == tmp_val) {
continue;
}
orte_errmgr.error_detected(1, "PML %s cannot be selected", tmp_val, NULL);
orte_errmgr.abort(1, "PML %s cannot be selected", tmp_val);
}
if(0 == i) {
orte_errmgr.error_detected(2, "No pml component available. This shouldn't happen.", NULL);
orte_errmgr.abort(2, "No pml component available. This shouldn't happen.");
}
}

Просмотреть файл

@ -27,7 +27,6 @@
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/pml/crcpw/pml_crcpw.h"
#include "ompi/mca/bml/base/base.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/class/ompi_free_list.h"

Просмотреть файл

@ -34,7 +34,8 @@
#include "pml_dr_sendreq.h"
#include "pml_dr_recvreq.h"
#include "ompi/mca/bml/base/base.h"
#include "orte/mca/ns/ns.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/mca/pml/base/base.h"
@ -241,9 +242,9 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs)
/* this won't work for comm spawn and other dynamic
processes, but will work for initial job start */
idx = opal_pointer_array_add(&mca_pml_dr.endpoints, (void*) endpoint);
if(orte_ns.compare_fields(ORTE_NS_CMP_ALL,
orte_process_info.my_name,
&(endpoint->proc_ompi->proc_name)) == ORTE_EQUAL) {
if(orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
ORTE_PROC_MY_NAME,
&(endpoint->proc_ompi->proc_name)) == OPAL_EQUAL) {
mca_pml_dr.my_rank = idx;
}
endpoint->local = endpoint->dst = idx;

Просмотреть файл

@ -19,7 +19,6 @@
#include "ompi_config.h"
#include "pml_dr.h"
#include "pml_dr_endpoint.h"
#include "orte/mca/ns/ns.h"

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше