Commit of ORCA: Open MPI Runtime Collaborative Abstraction
This is a runtime interposition project that sits between the OMPI and ORTE layers in Open MPI. The project is described on the wiki: https://svn.open-mpi.org/trac/ompi/wiki/Runtime_Interposition And on this email thread: http://www.open-mpi.org/community/lists/devel/2012/06/11109.php This commit was SVN r26670.
Этот коммит содержится в:
родитель
d7787b625e
Коммит
542330e3a7
2
VERSION
2
VERSION
@ -1,6 +1,7 @@
|
||||
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
|
||||
# This is the VERSION file for Open MPI, describing the precise
|
||||
# version of Open MPI in this distribution. The various components of
|
||||
@ -94,6 +95,7 @@ libmpi_mpifh_so_version=0:0:0
|
||||
libmpi_usempi_tkr_so_version=0:0:0
|
||||
libmpi_usempi_ignore_tkr_so_version=0:0:0
|
||||
libopen_rte_so_version=0:0:0
|
||||
libopen_rca_so_version=0:0:0
|
||||
libopen_pal_so_version=0:0:0
|
||||
libmpi_java_so_version=0:0:0
|
||||
|
||||
|
29
autogen.pl
29
autogen.pl
@ -2,7 +2,7 @@
|
||||
#
|
||||
# Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
#
|
||||
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -41,6 +41,7 @@ my @subdirs;
|
||||
# Command line parameters
|
||||
my $no_ompi_arg = 0;
|
||||
my $no_orte_arg = 0;
|
||||
my $no_orca_arg = 0;
|
||||
my $quiet_arg = 0;
|
||||
my $debug_arg = 0;
|
||||
my $help_arg = 0;
|
||||
@ -925,6 +926,7 @@ sub patch_autotools_output {
|
||||
|
||||
my $ok = Getopt::Long::GetOptions("no-ompi" => \$no_ompi_arg,
|
||||
"no-orte" => \$no_orte_arg,
|
||||
"no-orca" => \$no_orca_arg,
|
||||
"quiet|q" => \$quiet_arg,
|
||||
"debug|d" => \$debug_arg,
|
||||
"help|h" => \$help_arg,
|
||||
@ -939,6 +941,7 @@ if (!$ok || $help_arg) {
|
||||
print "Options:
|
||||
--no-ompi | -no-ompi Do not build the Open MPI layer
|
||||
--no-orte | -no-orte Do not build the ORTE layer
|
||||
--no-orca | -no-orca Do not build the ORCA and Open MPI layers
|
||||
--quiet | -q Do not display normal verbose output
|
||||
--debug | -d Output lots of debug information
|
||||
--help | -h This help list
|
||||
@ -967,12 +970,26 @@ if (! -e "orte") {
|
||||
$no_orte_arg = 1;
|
||||
debug "No orte subdirectory found - will not build ORTE\n";
|
||||
}
|
||||
if (! -e "orca") {
|
||||
$no_orca_arg = 1;
|
||||
$no_ompi_arg = 1;
|
||||
debug "No orca subdirectory found - will not build Pinapple and MPI layers\n";
|
||||
}
|
||||
|
||||
# --no-orca implies --no-ompi
|
||||
if ($no_orca_arg) {
|
||||
$no_ompi_arg = 1;
|
||||
}
|
||||
|
||||
if ($no_ompi_arg) {
|
||||
$project_name_long = "Open MPI Runtime Collaborative Abstraction";
|
||||
$project_name_short = "open-rca";
|
||||
}
|
||||
if ($no_ompi_arg && $no_orca_arg) {
|
||||
$project_name_long = "Open MPI Run Time Environment";
|
||||
$project_name_short = "open-rte";
|
||||
}
|
||||
if ($no_orte_arg) {
|
||||
}
|
||||
if ($no_ompi_arg && $no_orca_arg && $no_orte_arg ) {
|
||||
$project_name_long = "Open Portability Access Layer";
|
||||
$project_name_short = "open-pal";
|
||||
}
|
||||
@ -1126,9 +1143,11 @@ if (! (-f "VERSION" && -f "configure.ac" && -f $topdir_file)) {
|
||||
my $projects;
|
||||
push(@{$projects}, { name => "opal", dir => "opal", need_base => 1 });
|
||||
push(@{$projects}, { name => "orte", dir => "orte", need_base => 1 })
|
||||
if (!$no_ompi_arg || !$no_orte_arg);
|
||||
if (!$no_orte_arg);
|
||||
push(@{$projects}, { name => "orca", dir => "orca", need_base => 1 })
|
||||
if (!$no_orca_arg);
|
||||
push(@{$projects}, { name => "ompi", dir => "ompi", need_base => 1 })
|
||||
if (!$no_ompi_arg);
|
||||
if (!$no_ompi_arg && !$no_orca_arg);
|
||||
|
||||
# Save the list of projects in the m4 file
|
||||
my $str;
|
||||
|
@ -11,6 +11,7 @@
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -91,6 +92,9 @@ set dirs=opal
|
||||
if (-d orte) then
|
||||
set dirs="$dirs orte"
|
||||
endif
|
||||
if (-d orca) then
|
||||
set dirs="$dirs orca"
|
||||
endif
|
||||
if (-d ompi) then
|
||||
set dirs="$dirs ompi"
|
||||
endif
|
||||
@ -169,7 +173,7 @@ cd ..
|
||||
echo "*** Now in: `pwd`"
|
||||
echo "*** Replacing config.sub/config.guess with latest from ftp.gnu.org..."
|
||||
foreach file (config.guess config.sub)
|
||||
foreach dir (opal orte ompi)
|
||||
foreach dir (opal orte orca ompi)
|
||||
if (-d $dir) then
|
||||
find $dir -name $file \
|
||||
-exec chmod +w {} \; \
|
||||
|
24
configure.ac
24
configure.ac
@ -14,7 +14,7 @@
|
||||
# Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Copyright (c) 2006-2011 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
# Copyright (c) 2009-2012 Oak Ridge National Labs. All rights reserved.
|
||||
# Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
@ -98,6 +98,7 @@ AC_SUBST([CONFIGURE_DEPENDENCIES], ['$(top_srcdir)/VERSION'])
|
||||
# Set up project specific AM_CONDITIONALs
|
||||
AM_CONDITIONAL([PROJECT_OMPI], m4_ifdef([project_ompi], [true], [false]))
|
||||
AM_CONDITIONAL([PROJECT_ORTE], m4_ifdef([project_orte], [true], [false]))
|
||||
AM_CONDITIONAL([PROJECT_ORCA], m4_ifdef([project_orca], [true], [false]))
|
||||
|
||||
ompi_show_subtitle "Checking versions"
|
||||
|
||||
@ -112,6 +113,11 @@ m4_ifdef([project_orte],
|
||||
[$srcdir/VERSION],
|
||||
[orte/include/orte/version.h])])
|
||||
|
||||
m4_ifdef([project_orca],
|
||||
[OPAL_SAVE_VERSION([ORCA], [Open MPI Runtime Collaborative Abstraction],
|
||||
[$srcdir/VERSION],
|
||||
[orca/include/orca/version.h])])
|
||||
|
||||
OPAL_SAVE_VERSION([OPAL], [Open Portable Access Layer], [$srcdir/VERSION],
|
||||
[opal/include/opal/version.h])
|
||||
|
||||
@ -136,6 +142,8 @@ m4_ifdef([project_ompi],
|
||||
AC_SUBST(libmca_common_portals_so_version)])
|
||||
m4_ifdef([project_orte],
|
||||
[AC_SUBST(libopen_rte_so_version)])
|
||||
m4_ifdef([project_orca],
|
||||
[AC_SUBST(libopen_rca_so_version)])
|
||||
AC_SUBST(libmca_opal_common_hwloc_so_version)
|
||||
AC_SUBST(libopen_pal_so_version)
|
||||
|
||||
@ -163,6 +171,8 @@ AC_DEFINE_UNQUOTED([OMPI_ENABLE_PROGRESS_THREADS], [$OMPI_ENABLE_PROGRESS_THREAD
|
||||
# List header files to generate
|
||||
|
||||
AM_CONFIG_HEADER([opal/include/opal_config.h])
|
||||
m4_ifdef([project_orca],
|
||||
[AM_CONFIG_HEADER([orca/include/orca_config.h])])
|
||||
m4_ifdef([project_orte],
|
||||
[AM_CONFIG_HEADER([orte/include/orte_config.h])])
|
||||
m4_ifdef([project_ompi],
|
||||
@ -245,6 +255,7 @@ AC_SUBST(top_ompi_builddir)
|
||||
|
||||
OPAL_CONFIGURE_OPTIONS
|
||||
m4_ifdef([project_orte], [ORTE_CONFIGURE_OPTIONS])
|
||||
m4_ifdef([project_orca], [ORCA_CONFIGURE_OPTIONS])
|
||||
m4_ifdef([project_ompi], [OMPI_CONFIGURE_OPTIONS])
|
||||
|
||||
if test "$enable_binaries" = "no" -a "$enable_dist" = "yes"; then
|
||||
@ -566,7 +577,7 @@ m4_ifdef([project_orte], [ORTE_SETUP_JAVA])
|
||||
# Java MPI Binding request
|
||||
##################################
|
||||
# Only needed for OMPI
|
||||
m4_ifdef([project_ompi], [OMPI_SETUP_JAVA_BINDINGS])
|
||||
m4_ifdef([project_ompi], [m4_ifdef([project_orte], [OMPI_SETUP_JAVA_BINDINGS], [OMPI_SETUP_NO_JAVA_BINDINGS])])
|
||||
|
||||
##################################
|
||||
# Hadoop support
|
||||
@ -1123,9 +1134,9 @@ if test "$OMPI_TOP_BUILDDIR" != "$OMPI_TOP_SRCDIR"; then
|
||||
# rather than have successive assignments to these shell
|
||||
# variables, lest the $(foo) names try to get evaluated here.
|
||||
# Yuck!
|
||||
CPPFLAGS='-I$(top_srcdir) -I$(top_builddir) -I$(top_srcdir)/opal/include m4_ifdef([project_orte], [-I$(top_srcdir)/orte/include]) m4_ifdef([project_ompi], [-I$(top_srcdir)/ompi/include])'" $CPPFLAGS"
|
||||
CPPFLAGS='-I$(top_srcdir) -I$(top_builddir) -I$(top_srcdir)/opal/include m4_ifdef([project_orca], [-I$(top_srcdir)/orca/include]) m4_ifdef([project_orte], [-I$(top_srcdir)/orte/include]) m4_ifdef([project_ompi], [-I$(top_srcdir)/ompi/include])'" $CPPFLAGS"
|
||||
# C++ is only relevant if we're building OMPI
|
||||
m4_ifdef([project_ompi], [CXXCPPFLAGS='-I$(top_srcdir) -I$(top_builddir) -I$(top_srcdir)/opal/include -I$(top_srcdir)/orte/include -I$(top_srcdir)/ompi/include'" $CXXCPPFLAGS"])
|
||||
m4_ifdef([project_ompi], [CXXCPPFLAGS='-I$(top_srcdir) -I$(top_builddir) -I$(top_srcdir)/opal/include -I$(top_srcdir)/orca/include -I$(top_srcdir)/orte/include -I$(top_srcdir)/ompi/include'" $CXXCPPFLAGS"])
|
||||
else
|
||||
CPPFLAGS='-I$(top_srcdir)'" $CPPFLAGS"
|
||||
# C++ is only relevant if we're building OMPI
|
||||
@ -1136,7 +1147,7 @@ fi
|
||||
# versions without optimization for debugger modules).
|
||||
|
||||
m4_ifdef([project_orte], [ORTE_SETUP_DEBUGGER_FLAGS],
|
||||
[m4_ifdef([project_ompi], [ORTE_SETUP_DEBUGGER_FLAGS])])
|
||||
[m4_ifdef([project_ompi], [m4_ifdef([project_orte], [ORTE_SETUP_DEBUGGER_FLAGS])] )] )
|
||||
|
||||
#
|
||||
# Delayed the substitution of CFLAGS and CXXFLAGS until now because
|
||||
@ -1169,6 +1180,7 @@ AC_PATH_PROG(PERL, perl, perl)
|
||||
|
||||
OPAL_SETUP_WRAPPER_FINAL
|
||||
m4_ifdef([project_orte], [ORTE_SETUP_WRAPPER_FINAL])
|
||||
m4_ifdef([project_orca], [ORCA_SETUP_WRAPPER_FINAL])
|
||||
m4_ifdef([project_ompi], [OMPI_SETUP_WRAPPER_FINAL])
|
||||
|
||||
# Recreate some defines prefixed with OMPI_ so that there are no bare
|
||||
@ -1215,6 +1227,7 @@ fi
|
||||
|
||||
m4_ifdef([opal_CONFIG_LOCAL], [opal_CONFIG_LOCAL])
|
||||
m4_ifdef([orte_CONFIG_LOCAL], [orte_CONFIG_LOCAL])
|
||||
m4_ifdef([orca_CONFIG_LOCAL], [orca_CONFIG_LOCAL])
|
||||
m4_ifdef([ompi_CONFIG_LOCAL], [ompi_CONFIG_LOCAL])
|
||||
|
||||
############################################################################
|
||||
@ -1242,6 +1255,7 @@ AC_CONFIG_FILES([
|
||||
|
||||
OPAL_CONFIG_FILES
|
||||
m4_ifdef([project_orte], [ORTE_CONFIG_FILES])
|
||||
m4_ifdef([project_orca], [ORCA_CONFIG_FILES])
|
||||
m4_ifdef([project_ompi], [OMPI_CONFIG_FILES])
|
||||
|
||||
AC_OUTPUT
|
||||
|
@ -12,6 +12,7 @@
|
||||
# Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Copyright (c) 2010-2011 Sandia National Laboratories. All rights reserved.
|
||||
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -138,7 +139,7 @@ libmpi_la_LIBADD = \
|
||||
$(MCA_ompi_FRAMEWORK_LIBS) \
|
||||
$(OMPI_MPIEXT_C_LIBS) \
|
||||
$(OMPI_LIBMPI_EXTRA_LIBS) \
|
||||
$(top_ompi_builddir)/orte/libopen-rte-lt.la
|
||||
$(top_ompi_builddir)/orca/libopen-rca-lt.la
|
||||
libmpi_la_DEPENDENCIES = $(libmpi_la_LIBADD)
|
||||
libmpi_la_LDFLAGS = \
|
||||
-version-info $(libmpi_so_version) \
|
||||
|
@ -11,6 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -92,7 +93,8 @@
|
||||
#include "ompi/errhandler/errcode.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
/*
|
||||
* Private functions
|
||||
@ -174,7 +176,7 @@ int ompi_attr_create_predefined(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = set_f(MPI_APPNUM, orte_process_info.app_num);
|
||||
ret = set_f(MPI_APPNUM, orca_process_info_get_app_num() );
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -28,8 +28,8 @@
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
@ -1252,7 +1252,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
|
||||
int rc;
|
||||
int local_rank, local_size;
|
||||
ompi_proc_t **rprocs=NULL;
|
||||
orte_std_cntr_t size_len;
|
||||
orca_std_cntr_t size_len;
|
||||
int int_len, rlen;
|
||||
opal_buffer_t *sbuf=NULL, *rbuf=NULL;
|
||||
void *sendbuf;
|
||||
@ -1356,7 +1356,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
|
||||
goto err_exit;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.load(rbuf, recvbuf, rlen))) {
|
||||
if (OMPI_SUCCESS != (rc = opal_dss.load(rbuf, recvbuf, rlen))) {
|
||||
goto err_exit;
|
||||
}
|
||||
|
||||
@ -1426,7 +1426,6 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high )
|
||||
int scount=0;
|
||||
int rc;
|
||||
ompi_proc_t *ourproc, *theirproc;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
|
||||
rank = ompi_comm_rank (intercomm);
|
||||
rsize= ompi_comm_remote_size (intercomm);
|
||||
@ -1468,8 +1467,8 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high )
|
||||
ourproc = ompi_group_peer_lookup(intercomm->c_local_group,0);
|
||||
theirproc = ompi_group_peer_lookup(intercomm->c_remote_group,0);
|
||||
|
||||
mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID;
|
||||
rc = orte_util_compare_name_fields(mask, &(ourproc->proc_name), &(theirproc->proc_name));
|
||||
rc = orca_process_name_compare((ORCA_NAME_CMP_JOBID | ORCA_NAME_CMP_VPID),
|
||||
&(ourproc->proc_name), &(theirproc->proc_name));
|
||||
if ( 0 > rc ) {
|
||||
flag = true;
|
||||
}
|
||||
|
@ -27,7 +27,6 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "orte/types.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/op/op.h"
|
||||
@ -41,7 +40,7 @@
|
||||
#include "ompi/runtime/mpiruntime.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
@ -780,11 +779,11 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf,
|
||||
int i;
|
||||
int rc;
|
||||
int local_leader, local_rank;
|
||||
orte_process_name_t *remote_leader=NULL;
|
||||
orte_std_cntr_t size_count;
|
||||
orca_process_name_t *remote_leader=NULL;
|
||||
orca_std_cntr_t size_count;
|
||||
|
||||
local_leader = (*((int*)lleader));
|
||||
remote_leader = (orte_process_name_t*)rleader;
|
||||
remote_leader = (orca_process_name_t*)rleader;
|
||||
size_count = count;
|
||||
|
||||
if ( &ompi_mpi_op_sum.op != op && &ompi_mpi_op_prod.op != op &&
|
||||
@ -814,23 +813,23 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf,
|
||||
sbuf = OBJ_NEW(opal_buffer_t);
|
||||
rbuf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(sbuf, tmpbuf, (orte_std_cntr_t)count, OPAL_INT))) {
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(sbuf, tmpbuf, (orca_std_cntr_t)count, OPAL_INT))) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if ( send_first ) {
|
||||
if (0 > (rc = orte_rml.send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0))) {
|
||||
if (0 > (rc = orca_oob_send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0))) {
|
||||
goto exit;
|
||||
}
|
||||
if (0 > (rc = orte_rml.recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0))) {
|
||||
if (0 > (rc = orca_oob_recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0))) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (0 > (rc = orte_rml.recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0))) {
|
||||
if (0 > (rc = orca_oob_recv_buffer(remote_leader, rbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0))) {
|
||||
goto exit;
|
||||
}
|
||||
if (0 > (rc = orte_rml.send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0))) {
|
||||
if (0 > (rc = orca_oob_send_buffer(remote_leader, sbuf, OMPI_RML_TAG_COMM_CID_INTRA, 0))) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||
dnl reserved.
|
||||
dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
dnl Copyright (c) 2009-2012 Oak Ridge National Labs. All rights reserved.
|
||||
dnl
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
@ -252,5 +252,30 @@ fi
|
||||
AC_MSG_RESULT([$OMPI_FORTRAN_MAX_ARRAY_RANK])
|
||||
AC_SUBST(OMPI_FORTRAN_MAX_ARRAY_RANK)
|
||||
|
||||
|
||||
#
|
||||
# A global check for ORTE to amke it easier to support the tools
|
||||
# See note in orca_configure_options.m4
|
||||
#
|
||||
AC_MSG_CHECKING([if want ORTE supported OMPI tools])
|
||||
if test "$ORCA_WITH_ORTE_SUPPORT" = "0"; then
|
||||
list_of_frameworks="pubsub-pmi,dpm-orte,pubsub-orte"
|
||||
if test -z $enable_mca_no_build ; then
|
||||
enable_mca_no_build="$list_of_frameworks"
|
||||
else
|
||||
enable_mca_no_build="$enable_mca_no_build,$list_of_frameworks"
|
||||
fi
|
||||
OMPI_WITH_ORTE_SUPPORTED_TOOLS=0
|
||||
AC_MSG_RESULT([no])
|
||||
else
|
||||
OMPI_WITH_ORTE_SUPPORTED_TOOLS=1
|
||||
AC_MSG_RESULT([yes])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([OMPI_WITH_ORTE_SUPPORTED_TOOLS],
|
||||
[$OMPI_WITH_ORTE_SUPPORTED_TOOLS],
|
||||
[Whether we want ORTE supported OMPI tools])
|
||||
AM_CONDITIONAL(OMPI_WITH_ORTE_SUPPORTED_TOOLS, test "$OMPI_WITH_ORTE_SUPPORTED_TOOLS" = "1")
|
||||
|
||||
])dnl
|
||||
|
||||
|
@ -14,6 +14,7 @@ dnl Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights
|
||||
dnl reserved.
|
||||
dnl Copyright (c) 2007-2012 Oracle and/or its affiliates. All rights reserved.
|
||||
dnl Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
@ -26,11 +27,29 @@ AC_DEFUN([OMPI_SETUP_JAVA_BINDINGS_BANNER],[
|
||||
ompi_show_subtitle "Java MPI bindings"
|
||||
])
|
||||
|
||||
#
|
||||
# OMPI_SETUP_NO_JAVA_BINDINGS()
|
||||
# -----------------------------
|
||||
# Force configure to not build bindings.
|
||||
# For the case: ./autogen.pl --no-orte
|
||||
#
|
||||
AC_DEFUN([OMPI_SETUP_NO_JAVA_BINDINGS],[
|
||||
AC_MSG_CHECKING([if want Java bindings])
|
||||
AC_MSG_RESULT([no (Needs ORTE support)])
|
||||
|
||||
WANT_MPI_JAVA_SUPPORT=0
|
||||
|
||||
AC_DEFINE_UNQUOTED([OMPI_WANT_JAVA_BINDINGS], [$WANT_MPI_JAVA_SUPPORT],
|
||||
[do we want java mpi bindings])
|
||||
AM_CONDITIONAL(OMPI_WANT_JAVA_BINDINGS, test "$WANT_MPI_JAVA_SUPPORT" = "1")
|
||||
])
|
||||
|
||||
# OMPI_SETUP_JAVA_BINDINGS()
|
||||
# ----------------
|
||||
# Do everything required to setup the Java MPI bindings. Safe to AC_REQUIRE
|
||||
# this macro.
|
||||
AC_DEFUN([OMPI_SETUP_JAVA_BINDINGS],[
|
||||
|
||||
# must have Java setup
|
||||
AC_REQUIRE([ORTE_SETUP_JAVA])
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -72,9 +73,7 @@
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/include/mpi.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#if defined(OMPI_MSGQ_DLL)
|
||||
/* This variable is old/deprecated -- the mpimsgq_dll_locations[]
|
||||
@ -170,17 +169,17 @@ void ompi_wait_for_debugger(void)
|
||||
{
|
||||
int i, debugger;
|
||||
char *a, *b, **dirs, **tmp1 = NULL, **tmp2 = NULL;
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
#if ORCA_WITH_FULL_ORTE_SUPPORT
|
||||
opal_buffer_t buf;
|
||||
int rc;
|
||||
#endif
|
||||
|
||||
/* See lengthy comment in orte/tools/orterun/debuggers.c about
|
||||
orte_in_parallel_debugger */
|
||||
#if ORTE_DISABLE_FULL_SUPPORT
|
||||
#if !ORCA_WITH_FULL_ORTE_SUPPORT
|
||||
debugger = 0;
|
||||
#else
|
||||
debugger = orte_in_parallel_debugger;
|
||||
debugger = orca_info_in_parallel_debugger();
|
||||
#endif
|
||||
|
||||
/* Add in environment variables for other launchers, such as yod,
|
||||
@ -225,8 +224,8 @@ void ompi_wait_for_debugger(void)
|
||||
mpimsgq_dll_locations = tmp1;
|
||||
mpidbg_dll_locations = tmp2;
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
if (orte_standalone_operation) {
|
||||
#if ORCA_WITH_FULL_ORTE_SUPPORT
|
||||
if (orca_info_standalone_operation()) {
|
||||
#endif
|
||||
/* spin until debugger attaches and releases us */
|
||||
while (MPIR_debug_gate == 0) {
|
||||
@ -238,7 +237,7 @@ void ompi_wait_for_debugger(void)
|
||||
sleep(1); /* seconds */
|
||||
#endif
|
||||
}
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
#if ORCA_WITH_FULL_ORTE_SUPPORT
|
||||
} else {
|
||||
|
||||
/* only the rank=0 proc waits for either a message from the
|
||||
@ -246,21 +245,21 @@ void ompi_wait_for_debugger(void)
|
||||
* spin in * the grpcomm barrier in ompi_mpi_init until rank=0
|
||||
* joins them.
|
||||
*/
|
||||
if (0 != ORTE_PROC_MY_NAME->vpid) {
|
||||
if (0 != orca_process_info_get_vpid(ORCA_PROC_MY_NAME) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* VPID 0 waits for a message from the HNP */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf,
|
||||
ORTE_RML_TAG_DEBUGGER_RELEASE, 0);
|
||||
rc = orca_oob_recv_buffer(ORCA_NAME_WILDCARD, &buf,
|
||||
ORCA_OOB_TAG_DEBUGGER_RELEASE, 0);
|
||||
OBJ_DESTRUCT(&buf); /* don't care about contents of message */
|
||||
if (rc < 0) {
|
||||
/* if it failed for some reason, then we are in trouble -
|
||||
* for now, just report the problem and give up waiting
|
||||
*/
|
||||
opal_output(0, "Debugger_attach[rank=%ld]: could not wait for debugger!",
|
||||
(long)ORTE_PROC_MY_NAME->vpid);
|
||||
(long)orca_process_info_get_vpid(ORCA_PROC_MY_NAME) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -12,6 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -35,7 +36,7 @@
|
||||
#include "ompi/errhandler/errhandler_predefined.h"
|
||||
#include "ompi/errhandler/errcode-internal.h"
|
||||
|
||||
#include "orte/types.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2006 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -30,9 +30,8 @@
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/errhandler/errhandler_predefined.h"
|
||||
#include "ompi/errhandler/errcode.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
@ -163,7 +162,7 @@ static void out(char *str, char *arg)
|
||||
}
|
||||
|
||||
/*
|
||||
* Use orte_show_help() to aggregate the error messages (i.e., show it
|
||||
* Use orca_show_help() to aggregate the error messages (i.e., show it
|
||||
* once rather than N times).
|
||||
*
|
||||
* Note that this function will only be invoked for errors during the
|
||||
@ -182,8 +181,9 @@ static void backend_fatal_aggregate(char *type,
|
||||
arg = va_arg(arglist, char*);
|
||||
va_end(arglist);
|
||||
|
||||
asprintf(&prefix, "[%s:%d]", orte_process_info.nodename,
|
||||
(int) orte_process_info.pid);
|
||||
asprintf(&prefix, "[%s:%d]",
|
||||
orca_process_info_get_nodename(),
|
||||
(int) orca_process_info_get_pid() );
|
||||
|
||||
if (NULL != error_code) {
|
||||
err_msg = ompi_mpi_errnum_get_string(*error_code);
|
||||
@ -195,18 +195,22 @@ static void backend_fatal_aggregate(char *type,
|
||||
}
|
||||
|
||||
if (NULL != name && ompi_mpi_initialized && !ompi_mpi_finalized) {
|
||||
orte_show_help("help-mpi-errors.txt",
|
||||
orca_show_help("help-mpi-errors.txt",
|
||||
"mpi_errors_are_fatal", false,
|
||||
prefix, (NULL == arg) ? "" : "in",
|
||||
(NULL == arg) ? "" : arg,
|
||||
prefix, ORTE_PROC_MY_NAME->jobid, ORTE_PROC_MY_NAME->vpid,
|
||||
prefix,
|
||||
orca_process_info_get_jobid(ORCA_PROC_MY_NAME),
|
||||
orca_process_info_get_vpid(ORCA_PROC_MY_NAME),
|
||||
prefix, type, name, prefix, err_msg, prefix, type, prefix);
|
||||
} else if (NULL == name) {
|
||||
orte_show_help("help-mpi-errors.txt",
|
||||
orca_show_help("help-mpi-errors.txt",
|
||||
"mpi_errors_are_fatal unknown handle", false,
|
||||
prefix, (NULL == arg) ? "" : "in",
|
||||
(NULL == arg) ? "" : arg,
|
||||
prefix, ORTE_PROC_MY_NAME->jobid, ORTE_PROC_MY_NAME->vpid,
|
||||
prefix,
|
||||
orca_process_info_get_jobid(ORCA_PROC_MY_NAME),
|
||||
orca_process_info_get_vpid(ORCA_PROC_MY_NAME),
|
||||
prefix, type, prefix, err_msg, prefix, type, prefix);
|
||||
}
|
||||
|
||||
@ -325,7 +329,7 @@ static void backend_fatal(char *type, struct ompi_communicator_t *comm,
|
||||
meaning that there is a better chance that the error message
|
||||
will actually get printed). Note that we can only do
|
||||
aggregation after MPI_INIT and before MPI_FINALIZE. */
|
||||
if (orte_help_want_aggregate && orte_show_help_is_available()) {
|
||||
if (orca_show_help_want_aggregate() && orca_show_help_is_available()) {
|
||||
backend_fatal_aggregate(type, comm, name, error_code, arglist);
|
||||
} else {
|
||||
backend_fatal_no_aggregate(type, comm, name, error_code, arglist);
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -19,9 +20,9 @@
|
||||
#ifndef OMPI_CONSTANTS_H
|
||||
#define OMPI_CONSTANTS_H
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orca/constants.h"
|
||||
|
||||
#define OMPI_ERR_BASE ORTE_ERR_MAX
|
||||
#define OMPI_ERR_BASE ORCA_ERR_MAX
|
||||
|
||||
/* error codes */
|
||||
enum {
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -28,14 +29,16 @@
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/bml/base/bml_base_btl.h"
|
||||
#include "bml_r2.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
extern mca_bml_base_component_t mca_bml_r2_component;
|
||||
@ -405,13 +408,13 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
|
||||
if (mca_bml_r2.show_unreach_errors &&
|
||||
OMPI_ERR_UNREACH == ret) {
|
||||
orte_show_help("help-mca-bml-r2.txt",
|
||||
orca_show_help("help-mca-bml-r2.txt",
|
||||
"unreachable proc",
|
||||
true,
|
||||
ORTE_NAME_PRINT(&(ompi_proc_local_proc->proc_name)),
|
||||
ORCA_NAME_PRINT(&(ompi_proc_local_proc->proc_name)),
|
||||
(ompi_proc_local_proc->proc_hostname ?
|
||||
ompi_proc_local_proc->proc_hostname : "unknown!"),
|
||||
ORTE_NAME_PRINT(&(unreach_proc->proc_name)),
|
||||
ORCA_NAME_PRINT(&(unreach_proc->proc_name)),
|
||||
(unreach_proc->proc_hostname ?
|
||||
unreach_proc->proc_hostname : "unknown!"),
|
||||
btl_names);
|
||||
|
@ -12,6 +12,7 @@
|
||||
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,8 +28,7 @@
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/runtime/ompi_cr.h"
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
@ -42,7 +42,7 @@
|
||||
|
||||
int mca_bml_r2_ft_event(int state)
|
||||
{
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
#if ORCA_WITH_FULL_ORTE_SUPPORT
|
||||
static bool first_continue_pass = false;
|
||||
ompi_proc_t** procs = NULL;
|
||||
size_t num_procs;
|
||||
@ -51,7 +51,6 @@ int mca_bml_r2_ft_event(int state)
|
||||
int loc_state;
|
||||
int param_type = -1;
|
||||
char *param_list = NULL;
|
||||
orte_grpcomm_collective_t coll;
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
/* Do nothing for now */
|
||||
@ -60,7 +59,7 @@ int mca_bml_r2_ft_event(int state)
|
||||
first_continue_pass = !first_continue_pass;
|
||||
|
||||
/* Since nothing in Checkpoint, we are fine here (unless required by BTL) */
|
||||
if( orte_cr_continue_like_restart && !first_continue_pass) {
|
||||
if( orca_info_cr_continue_like_restart() && !first_continue_pass) {
|
||||
procs = ompi_proc_all(&num_procs);
|
||||
if(NULL == procs) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
@ -142,7 +141,7 @@ int mca_bml_r2_ft_event(int state)
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
/* Matches OPAL_CRS_RESTART_PRE */
|
||||
if( orte_cr_continue_like_restart && first_continue_pass) {
|
||||
if( orca_info_cr_continue_like_restart() && first_continue_pass) {
|
||||
if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) {
|
||||
opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n");
|
||||
return ret;
|
||||
@ -153,20 +152,15 @@ int mca_bml_r2_ft_event(int state)
|
||||
}
|
||||
}
|
||||
/* Matches OPAL_CRS_RESTART */
|
||||
else if( orte_cr_continue_like_restart && !first_continue_pass ) {
|
||||
else if( orca_info_cr_continue_like_restart() && !first_continue_pass ) {
|
||||
/*
|
||||
* Barrier to make all processes have been successfully restarted before
|
||||
* we try to remove some restart only files.
|
||||
*/
|
||||
OBJ_CONSTRUCT(&coll, orte_grpcomm_collective_t);
|
||||
coll.id = orte_process_info.peer_init_barrier;
|
||||
if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier(&coll))) {
|
||||
opal_output(0, "bml:r2: ft_event(Restart): Failed in orte_grpcomm.barrier (%d)", ret);
|
||||
if (ORCA_SUCCESS != (ret = orca_coll_barrier(ORCA_COLL_TYPE_BARRIER_INIT) ) ) {
|
||||
opal_output(0, "bml:r2: ft_event(Restart): Failed in orca_coll_barrier (%d)", ret);
|
||||
return ret;
|
||||
}
|
||||
while (coll.active) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-open the BTL framework to get the full list of components.
|
||||
@ -236,15 +230,10 @@ int mca_bml_r2_ft_event(int state)
|
||||
* Barrier to make all processes have been successfully restarted before
|
||||
* we try to remove some restart only files.
|
||||
*/
|
||||
OBJ_CONSTRUCT(&coll, orte_grpcomm_collective_t);
|
||||
coll.id = orte_process_info.peer_init_barrier;
|
||||
if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier(&coll))) {
|
||||
opal_output(0, "bml:r2: ft_event(Restart): Failed in orte_grpcomm.barrier (%d)", ret);
|
||||
if (ORCA_SUCCESS != (ret = orca_coll_barrier(ORCA_COLL_TYPE_BARRIER_INIT) ) ) {
|
||||
opal_output(0, "bml:r2: ft_event(Restart): Failed in orca_coll_barrier (%d)", ret);
|
||||
return ret;
|
||||
}
|
||||
while (coll.active) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-open the BTL framework to get the full list of components.
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,11 +26,7 @@
|
||||
#include "base.h"
|
||||
#include "btl_base_error.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/types.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
int mca_btl_base_verbose = -1;
|
||||
|
||||
@ -63,11 +60,11 @@ void mca_btl_base_error_no_nics(const char* transport,
|
||||
char *procid;
|
||||
if (mca_btl_base_warn_component_unused) {
|
||||
/* print out no-nic warning if user told us to */
|
||||
asprintf(&procid, "%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
asprintf(&procid, "%s", ORCA_NAME_PRINT(ORCA_PROC_MY_NAME));
|
||||
|
||||
orte_show_help("help-mpi-btl-base.txt", "btl:no-nics",
|
||||
true, procid, transport, orte_process_info.nodename,
|
||||
nic_name);
|
||||
orca_show_help("help-mpi-btl-base.txt", "btl:no-nics",
|
||||
true, procid, transport, orca_process_info_get_nodename(),
|
||||
nic_name);
|
||||
free(procid);
|
||||
}
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -26,9 +27,7 @@
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
OMPI_DECLSPEC extern int mca_btl_base_verbose;
|
||||
|
||||
@ -38,8 +37,8 @@ OMPI_DECLSPEC extern int mca_btl_base_out(const char*, ...) __opal_attribute_for
|
||||
#define BTL_OUTPUT(args) \
|
||||
do { \
|
||||
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
|
||||
orte_process_info.nodename, \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
orca_process_info_get_nodename(), \
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_out args; \
|
||||
mca_btl_base_out("\n"); \
|
||||
@ -49,8 +48,8 @@ do { \
|
||||
#define BTL_ERROR(args) \
|
||||
do { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
orte_process_info.nodename, \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
orca_process_info_get_nodename(), \
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
@ -59,9 +58,9 @@ do { \
|
||||
#define BTL_PEER_ERROR(proc, args) \
|
||||
do { \
|
||||
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__, \
|
||||
orte_process_info.nodename); \
|
||||
orca_process_info_get_nodename()); \
|
||||
if(proc && proc->proc_hostname) { \
|
||||
mca_btl_base_err("to: %s ", proc->proc_hostname); \
|
||||
} \
|
||||
@ -75,8 +74,8 @@ do { \
|
||||
do { \
|
||||
if(mca_btl_base_verbose > 0) { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
orte_process_info.nodename, \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
orca_process_info_get_nodename(), \
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -21,13 +22,13 @@
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
|
||||
OBJ_CLASS_INSTANCE( mca_btl_base_selected_module_t,
|
||||
@ -152,9 +153,9 @@ int mca_btl_base_select(bool enable_progress_threads,
|
||||
/* Finished querying all components. Check for the bozo case. */
|
||||
|
||||
if (0 == opal_list_get_size(&mca_btl_base_modules_initialized)) {
|
||||
orte_show_help("help-mca-base.txt", "find-available:none-found", true,
|
||||
orca_show_help("help-mca-base.txt", "find-available:none-found", true,
|
||||
"btl");
|
||||
orte_errmgr.abort(1, NULL);
|
||||
orca_error_mgr_abort(1, NULL);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,7 +26,9 @@
|
||||
#include "elan/elan.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
@ -68,7 +71,7 @@ static int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl,
|
||||
FILE* file;
|
||||
ELAN_BASE* base;
|
||||
|
||||
filename = opal_os_path( false, orte_process_info.proc_session_dir, "ELAN_ID", NULL );
|
||||
filename = opal_os_path( false, orca_process_info_get_process_session_dir(), "ELAN_ID", NULL );
|
||||
file = fopen( filename, "w" );
|
||||
fprintf( file, "%s %d\n", ompi_proc_local_proc->proc_hostname, elan_btl->elan_position );
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -643,7 +644,7 @@ int mca_btl_mx_ft_event(int state) {
|
||||
* kernel: blcr: thaw_threads returned error, aborting. -1
|
||||
* JJH: It may be possible to, instead of restarting the entire driver, just reconnect endpoints
|
||||
*/
|
||||
orte_cr_continue_like_restart = true;
|
||||
orca_info_cr_continue_like_restart() = true;
|
||||
|
||||
for( i = 0; i < mca_btl_mx_component.mx_num_btls; i++ ) {
|
||||
mx_btl = mca_btl_mx_component.mx_btls[i];
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -18,7 +19,8 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
|
||||
#include "btl_mx.h"
|
||||
@ -121,7 +123,7 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
ompi_proc, (void*)&mx_peers, &size );
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
opal_output( 0, "mca_pml_base_modex_recv failed for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name) );
|
||||
ORCA_NAME_PRINT(&ompi_proc->proc_name) );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -130,7 +132,7 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
}
|
||||
if( (size % sizeof(mca_btl_mx_addr_t)) != 0 ) {
|
||||
opal_output( 0, "invalid mx address for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name) );
|
||||
ORCA_NAME_PRINT(&ompi_proc->proc_name) );
|
||||
return NULL;
|
||||
}
|
||||
/* Let's see if we have a way to connect to the remote proc using MX.
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,18 +33,18 @@
|
||||
#include "opal_stdint.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "opal/prefetch.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "opal/mca/timer/base/base.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/runtime/mpiruntime.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "btl_ofud.h"
|
||||
#include "btl_ofud_frag.h"
|
||||
#include "btl_ofud_endpoint.h"
|
||||
@ -439,7 +440,7 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
|
||||
*num_btl_modules = 0;
|
||||
num_devs = 0;
|
||||
|
||||
seedv[0] = ORTE_PROC_MY_NAME->vpid;
|
||||
seedv[0] = orca_process_info_get_vpid(ORCA_PROC_MY_NAME);
|
||||
seedv[1] = opal_timer_base_get_cycles();
|
||||
seedv[2] = opal_timer_base_get_cycles();
|
||||
seed48(seedv);
|
||||
@ -451,7 +452,7 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
|
||||
mca_btl_ofud_component.if_list = NULL;
|
||||
if (NULL != mca_btl_ofud_component.if_include &&
|
||||
NULL != mca_btl_ofud_component.if_exclude) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"specified include and exclude", true,
|
||||
mca_btl_ofud_component.if_include,
|
||||
mca_btl_ofud_component.if_exclude, NULL);
|
||||
@ -553,14 +554,14 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
|
||||
mca_btl_ofud_component.ud_btls = (mca_btl_ud_module_t*)
|
||||
malloc(sizeof(mca_btl_ud_module_t) * mca_btl_ofud_component.num_btls);
|
||||
if(NULL == mca_btl_ofud_component.ud_btls) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_OUT_OF_RESOURCE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
btls = (struct mca_btl_base_module_t**)
|
||||
malloc(mca_btl_ofud_component.num_btls * sizeof(mca_btl_ud_module_t*));
|
||||
if(NULL == btls) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_OUT_OF_RESOURCE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Sandia National Laboratories. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -125,14 +126,14 @@ mca_btl_ud_proc_t* mca_btl_ud_proc_create(ompi_proc_t* ompi_proc)
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
opal_output(0,
|
||||
"[%s:%d] ompi_modex_recv failed for peer %s",
|
||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
__FILE__,__LINE__,ORCA_NAME_PRINT(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(module_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if((size % sizeof(mca_btl_ud_addr_t)) != 0) {
|
||||
opal_output(0, "[%s:%d] invalid module address for peer %s",
|
||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
__FILE__,__LINE__,ORCA_NAME_PRINT(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(module_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -29,8 +30,8 @@
|
||||
#ifdef HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/arch.h"
|
||||
@ -54,7 +55,7 @@
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/grdma/mpool_grdma.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
@ -142,14 +143,14 @@ void mca_btl_openib_show_init_error(const char *file, int line,
|
||||
}
|
||||
#endif
|
||||
|
||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-no-mem",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "init-fail-no-mem",
|
||||
true, orca_process_info_get_nodename(),
|
||||
file, line, func, dev, str_limit);
|
||||
|
||||
if (NULL != str_limit) free(str_limit);
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, orca_process_info_get_nodename(),
|
||||
file, line, func, strerror(errno), errno, dev);
|
||||
}
|
||||
}
|
||||
@ -474,9 +475,9 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
|
||||
ompi_btl_openib_ini_values_t values;
|
||||
|
||||
if(mca_btl_openib_get_transport_type(openib_btl) != endpoint->rem_info.rem_transport_type) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"conflicting transport types", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
(openib_btl->device->ib_dev_attr).vendor_id,
|
||||
(openib_btl->device->ib_dev_attr).vendor_part_id,
|
||||
@ -495,9 +496,9 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
|
||||
|
||||
if (OMPI_SUCCESS != ret &&
|
||||
OMPI_ERR_NOT_FOUND != ret) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"error in device init", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(openib_btl->device->ib_dev));
|
||||
return ret;
|
||||
}
|
||||
@ -536,9 +537,9 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
|
||||
|
||||
if(0 != strcmp(mca_btl_openib_component.receive_queues,
|
||||
recv_qps)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"unsupported queues configuration", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
(openib_btl->device->ib_dev_attr).vendor_id,
|
||||
(openib_btl->device->ib_dev_attr).vendor_part_id,
|
||||
@ -558,9 +559,9 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
|
||||
if(NULL != values.receive_queues) {
|
||||
if(0 != strcmp(mca_btl_openib_component.receive_queues,
|
||||
values.receive_queues)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"unsupported queues configuration", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
(openib_btl->device->ib_dev_attr).vendor_id,
|
||||
(openib_btl->device->ib_dev_attr).vendor_part_id,
|
||||
@ -630,8 +631,8 @@ int mca_btl_openib_add_procs(
|
||||
|
||||
/* OOB, XOOB, and RDMACM do not support SELF comunication, so
|
||||
* mark the prco as unreachable by openib btl */
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields
|
||||
(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, &ompi_proc->proc_name)) {
|
||||
if (OPAL_EQUAL == orca_process_name_compare
|
||||
(ORCA_NAME_CMP_ALL, ORCA_PROC_MY_NAME, &ompi_proc->proc_name)) {
|
||||
continue;
|
||||
}
|
||||
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
|
||||
@ -1757,7 +1758,7 @@ int mca_btl_openib_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
/* Continue must reconstruct the routes (including modex), since we
|
||||
* have to tear down the devices completely. */
|
||||
orte_cr_continue_like_restart = true;
|
||||
orca_info_cr_continue_like_restart() = true;
|
||||
|
||||
/*
|
||||
* To keep the node from crashing we need to call ibv_close_device
|
||||
|
@ -3,6 +3,7 @@
|
||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -20,7 +21,7 @@
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "btl_openib.h"
|
||||
@ -348,15 +349,15 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
case IBV_EVENT_QP_ACCESS_ERR:
|
||||
case IBV_EVENT_PATH_MIG_ERR:
|
||||
case IBV_EVENT_SRQ_ERR:
|
||||
orte_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||
true,orte_process_info.nodename, orte_process_info.pid,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||
true,orca_process_info_get_nodename(), orca_process_info_get_pid(),
|
||||
event_type,
|
||||
openib_event_to_str((enum ibv_event_type)event_type),
|
||||
xrc_event ? "true" : "false");
|
||||
break;
|
||||
case IBV_EVENT_PORT_ERR:
|
||||
orte_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||
true,orte_process_info.nodename, orte_process_info.pid,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||
true,orca_process_info_get_nodename(), orca_process_info_get_pid(),
|
||||
event_type,
|
||||
openib_event_to_str((enum ibv_event_type)event_type),
|
||||
xrc_event ? "true" : "false");
|
||||
@ -385,8 +386,8 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
|
||||
break;
|
||||
default:
|
||||
orte_show_help("help-mpi-btl-openib.txt", "of unknown event",
|
||||
true,orte_process_info.nodename, orte_process_info.pid,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "of unknown event",
|
||||
true,orca_process_info_get_nodename(), orca_process_info_get_pid(),
|
||||
event_type, xrc_event ? "true" : "false");
|
||||
}
|
||||
ibv_ack_async_event(&event);
|
||||
|
@ -17,6 +17,7 @@
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -71,9 +72,7 @@ const char *ibv_get_sysfs_path(void);
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "opal_stdint.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
@ -604,8 +603,8 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
|
||||
openib_reg->mr = ibv_reg_mr(device->ib_pd, base, size, access_flag);
|
||||
|
||||
if (NULL == openib_reg->mr) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "mem-reg-fail",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "mem-reg-fail",
|
||||
true, orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
__func__, strerror(errno), errno);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
@ -697,8 +696,8 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
size. */
|
||||
if (mca_btl_openib_component.gid_index >
|
||||
ib_port_attr->gid_tbl_len) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "gid index too large",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "gid index too large",
|
||||
true, orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(device->ib_dev), port_num,
|
||||
mca_btl_openib_component.gid_index,
|
||||
ib_port_attr->gid_tbl_len);
|
||||
@ -755,8 +754,8 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
if(mca_btl_openib_component.ib_num_btls > 0 &&
|
||||
IB_DEFAULT_GID_PREFIX == subnet_id &&
|
||||
mca_btl_openib_component.warn_default_gid_prefix) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
|
||||
true, orte_process_info.nodename);
|
||||
orca_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
|
||||
true, orca_process_info_get_nodename());
|
||||
}
|
||||
|
||||
lmc = (1 << ib_port_attr->lmc);
|
||||
@ -779,7 +778,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
} else if (0 == lmc % (mca_btl_openib_component.apm_lmc + 1)) {
|
||||
lmc_step = mca_btl_openib_component.apm_lmc + 1;
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "apm with wrong lmc",true,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "apm with wrong lmc",true,
|
||||
mca_btl_openib_component.apm_lmc, lmc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -787,7 +786,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
if (mca_btl_openib_component.apm_lmc) {
|
||||
/* Disable apm and report warning */
|
||||
mca_btl_openib_component.apm_lmc = 0;
|
||||
orte_show_help("help-mpi-btl-openib.txt", "apm without lmc",true);
|
||||
orca_show_help("help-mpi-btl-openib.txt", "apm without lmc",true);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1127,11 +1126,11 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
||||
*/
|
||||
if (!(device->ib_dev_attr.device_cap_flags & IBV_DEVICE_XRC) &&
|
||||
MCA_BTL_XRC_ENABLED) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"XRC on device without XRC support", true,
|
||||
mca_btl_openib_component.num_xrc_qps,
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
orte_process_info.nodename);
|
||||
orca_process_info_get_nodename());
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -1436,9 +1435,9 @@ static int setup_qps(void)
|
||||
|
||||
queues = opal_argv_split(mca_btl_openib_component.receive_queues, ':');
|
||||
if (0 == opal_argv_count(queues)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"no qps in receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OMPI_ERROR;
|
||||
goto error;
|
||||
@ -1456,16 +1455,16 @@ static int setup_qps(void)
|
||||
#if HAVE_XRC
|
||||
num_xrc_qps++;
|
||||
#else
|
||||
orte_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
|
||||
orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
|
||||
orca_process_info_get_nodename(),
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OMPI_ERR_NOT_AVAILABLE;
|
||||
goto error;
|
||||
#endif
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid qp type in receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
mca_btl_openib_component.receive_queues,
|
||||
queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
@ -1476,8 +1475,8 @@ static int setup_qps(void)
|
||||
/* Current XRC implementation can't used with other QP types - PP
|
||||
and SRQ */
|
||||
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
|
||||
orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
|
||||
orca_process_info_get_nodename(),
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
@ -1485,8 +1484,8 @@ static int setup_qps(void)
|
||||
|
||||
/* Current XRC implementation can't used with btls_per_lid > 1 */
|
||||
if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
|
||||
true, orca_process_info_get_nodename(),
|
||||
mca_btl_openib_component.receive_queues, num_xrc_qps);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
@ -1511,9 +1510,9 @@ static int setup_qps(void)
|
||||
if ('P' == params[0][0]) {
|
||||
int32_t rd_win, rd_rsv;
|
||||
if (count < 3 || count > 6) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid pp qp specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1536,15 +1535,15 @@ static int setup_qps(void)
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_win = rd_win;
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv = rd_rsv;
|
||||
if ((rd_num - rd_low) > rd_win) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "non optimal rd_win",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "non optimal rd_win",
|
||||
true, rd_win, rd_num - rd_low);
|
||||
}
|
||||
} else {
|
||||
int32_t sd_max, rd_init, srq_limit;
|
||||
if (count < 3 || count > 7) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid srq specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1579,15 +1578,15 @@ static int setup_qps(void)
|
||||
}
|
||||
|
||||
if (rd_num < rd_init) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "rd_num must be >= rd_init",
|
||||
true, orte_process_info.nodename, queues[qp]);
|
||||
orca_show_help("help-mpi-btl-openib.txt", "rd_num must be >= rd_init",
|
||||
true, orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (rd_num < srq_limit) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "srq_limit must be > rd_num",
|
||||
true, orte_process_info.nodename, queues[qp]);
|
||||
orca_show_help("help-mpi-btl-openib.txt", "srq_limit must be > rd_num",
|
||||
true, orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1598,8 +1597,8 @@ static int setup_qps(void)
|
||||
}
|
||||
|
||||
if (rd_num <= rd_low) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
|
||||
true, orte_process_info.nodename, queues[qp]);
|
||||
orca_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
|
||||
true, orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1618,23 +1617,23 @@ static int setup_qps(void)
|
||||
mca_btl_openib_module.super.btl_eager_limit :
|
||||
mca_btl_openib_module.super.btl_max_send_size;
|
||||
if (max_qp_size < max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too small", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
orca_process_info_get_nodename(), max_qp_size,
|
||||
max_size_needed);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
} else if (max_qp_size > max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too big", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
orca_process_info_get_nodename(), max_qp_size,
|
||||
max_size_needed);
|
||||
}
|
||||
|
||||
if (mca_btl_openib_component.ib_free_list_max > 0 &&
|
||||
min_freelist_size > mca_btl_openib_component.ib_free_list_max) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "freelist too small", true,
|
||||
orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "freelist too small", true,
|
||||
orca_process_info_get_nodename(),
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
min_freelist_size);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
@ -1718,9 +1717,9 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
warning that we're using default values (unless overridden
|
||||
that we don't want to see these warnings) */
|
||||
if (mca_btl_openib_component.warn_no_device_params_found) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"no device params found", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
device->ib_dev_attr.vendor_id,
|
||||
device->ib_dev_attr.vendor_part_id);
|
||||
@ -1826,8 +1825,8 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
cq = ibv_create_cq(device->ib_dev_context, 1, NULL, NULL, 0);
|
||||
#endif
|
||||
if (NULL == cq) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, orca_process_info_get_nodename(),
|
||||
__FILE__, __LINE__, "ibv_create_cq",
|
||||
strerror(errno), errno,
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
@ -1879,7 +1878,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
/* Eager RDMA is not currently supported with progress threads */
|
||||
if (device->use_eager_rdma && OMPI_ENABLE_PROGRESS_THREADS) {
|
||||
device->use_eager_rdma = 0;
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"eager RDMA and progress threads", true);
|
||||
}
|
||||
|
||||
@ -1965,7 +1964,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
if (device->btls > 0) {
|
||||
/* if apm was enabled it should be > 1 */
|
||||
if (1 == mca_btl_openib_component.apm_ports) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"apm not enough ports", true);
|
||||
mca_btl_openib_component.apm_ports = 0;
|
||||
}
|
||||
@ -2224,10 +2223,10 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
if (NULL != values.receive_queues) {
|
||||
if (0 != strcmp(values.receive_queues,
|
||||
mca_btl_openib_component.receive_queues)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"locally conflicting receive_queues", true,
|
||||
opal_install_dirs.pkgdatadir,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(receive_queues_device->ib_dev),
|
||||
receive_queues_device->ib_dev_attr.vendor_id,
|
||||
receive_queues_device->ib_dev_attr.vendor_part_id,
|
||||
@ -2248,10 +2247,10 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
device's INI file, we must error. */
|
||||
else if (BTL_OPENIB_RQ_SOURCE_DEVICE_INI ==
|
||||
mca_btl_openib_component.receive_queues_source) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"locally conflicting receive_queues", true,
|
||||
opal_install_dirs.pkgdatadir,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(receive_queues_device->ib_dev),
|
||||
receive_queues_device->ib_dev_attr.vendor_id,
|
||||
receive_queues_device->ib_dev_attr.vendor_part_id,
|
||||
@ -2287,9 +2286,9 @@ error:
|
||||
}
|
||||
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"error in device init", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
|
||||
@ -2548,7 +2547,7 @@ sort_devs_by_distance(struct ibv_device **ib_devs, int count)
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
devs[i].ib_dev = ib_devs[i];
|
||||
if (OPAL_HAVE_HWLOC && orte_proc_is_bound) {
|
||||
if (OPAL_HAVE_HWLOC && orca_process_info_is_bound()) {
|
||||
/* If this process is bound to one or more PUs, we can get
|
||||
an accurate distance. */
|
||||
devs[i].distance = get_ib_dev_distance(ib_devs[i]);
|
||||
@ -2614,7 +2613,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
}
|
||||
|
||||
#ifndef __WINDOWS__
|
||||
seedv[0] = ORTE_PROC_MY_NAME->vpid;
|
||||
seedv[0] = orca_process_info_get_vpid(ORCA_PROC_MY_NAME);
|
||||
seedv[1] = opal_timer_base_get_cycles();
|
||||
seedv[2] = opal_timer_base_get_cycles();
|
||||
seed48(seedv);
|
||||
@ -2646,9 +2645,9 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
#if !OPAL_HAVE_THREADS
|
||||
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
|
||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"ptmalloc2 with no threads", true,
|
||||
orte_process_info.nodename);
|
||||
orca_process_info_get_nodename());
|
||||
goto no_btls;
|
||||
}
|
||||
#endif
|
||||
@ -2772,9 +2771,9 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
couldn't provide it. So print an error and deactivate
|
||||
this BTL. */
|
||||
if (mca_btl_openib_component.want_fork_support > 0) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"ibv_fork_init fail", true,
|
||||
orte_process_info.nodename);
|
||||
orca_process_info_get_nodename());
|
||||
goto no_btls;
|
||||
}
|
||||
}
|
||||
@ -2796,7 +2795,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
list_count++;
|
||||
|
||||
if (list_count > 1) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"specified include and exclude", true,
|
||||
NULL == mca_btl_openib_component.if_include ?
|
||||
"<not specified>" : mca_btl_openib_component.if_include,
|
||||
@ -2864,7 +2863,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
continue;
|
||||
}
|
||||
#else
|
||||
orte_show_help("help-mpi-btl-openib.txt", "no iwarp support",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "no iwarp support",
|
||||
true);
|
||||
#endif
|
||||
break;
|
||||
@ -2882,8 +2881,8 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
}
|
||||
free(dev_sorted);
|
||||
if (!found) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "no devices right type",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "no devices right type",
|
||||
true, orca_process_info_get_nodename(),
|
||||
((BTL_OPENIB_DT_IB == mca_btl_openib_component.device_type) ?
|
||||
"InfiniBand" :
|
||||
(BTL_OPENIB_DT_IWARP == mca_btl_openib_component.device_type) ?
|
||||
@ -2899,16 +2898,16 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
if (0 != opal_argv_count(mca_btl_openib_component.if_list) &&
|
||||
mca_btl_openib_component.warn_nonexistent_if) {
|
||||
char *str = opal_argv_join(mca_btl_openib_component.if_list, ',');
|
||||
orte_show_help("help-mpi-btl-openib.txt", "nonexistent port",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "nonexistent port",
|
||||
true, orca_process_info_get_nodename(),
|
||||
((NULL != mca_btl_openib_component.if_include) ?
|
||||
"in" : "ex"), str);
|
||||
free(str);
|
||||
}
|
||||
|
||||
if(0 == mca_btl_openib_component.ib_num_btls) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"no active ports found", true, orte_process_info.nodename);
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"no active ports found", true, orca_process_info_get_nodename());
|
||||
goto no_btls;
|
||||
}
|
||||
|
||||
@ -3019,9 +3018,9 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
/* Do finial init on device */
|
||||
ret = prepare_device_for_use(device);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"error in device init", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
goto no_btls;
|
||||
}
|
||||
@ -3578,16 +3577,16 @@ error:
|
||||
ibv_get_device_name(endpoint->qps[qp].qp->lcl_qp->context->device);
|
||||
|
||||
if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
||||
"pp rnr retry exceeded" :
|
||||
"srq rnr retry exceeded", true,
|
||||
orte_process_info.nodename, device_name,
|
||||
orca_process_info_get_nodename(), device_name,
|
||||
peer_hostname);
|
||||
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"pp retry exceeded", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
device_name, peer_hostname);
|
||||
}
|
||||
}
|
||||
|
@ -17,7 +17,7 @@
|
||||
* Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2011 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -37,7 +37,7 @@
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/types.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
@ -1042,9 +1042,9 @@ void *mca_btl_openib_endpoint_invoke_error(void *context)
|
||||
|
||||
/* If we didn't find a BTL, then just bail :-( */
|
||||
if (NULL == btl || NULL == btl->error_cb) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"cannot raise btl error", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
__FILE__, __LINE__);
|
||||
exit(1);
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -372,14 +373,14 @@ void btl_openib_handle_failover_control_messages(mca_btl_openib_control_header_t
|
||||
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
|
||||
"IB: rank=%d, control message (remote=%d), "
|
||||
"moved local head by one (new=%d)",
|
||||
ORTE_PROC_MY_NAME->vpid,
|
||||
orca_process_info_get_vpid(ORCA_PROC_MY_NAME),
|
||||
newep->endpoint_proc->proc_ompi->proc_name.vpid,
|
||||
newep->eager_rdma_local.head);
|
||||
} else {
|
||||
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
|
||||
"IB: rank=%d, control message (remote=%d), "
|
||||
"did not move local head by one (still=%d)",
|
||||
ORTE_PROC_MY_NAME->vpid,
|
||||
orca_process_info_get_vpid(ORCA_PROC_MY_NAME),
|
||||
newep->endpoint_proc->proc_ompi->proc_name.vpid,
|
||||
newep->eager_rdma_local.head);
|
||||
}
|
||||
@ -684,7 +685,7 @@ static void mca_btl_openib_endpoint_notify(mca_btl_base_endpoint_t* endpoint, ui
|
||||
bc_hdr->control.type = type;
|
||||
bc_hdr->lid = endpoint->endpoint_btl->port_info.lid;
|
||||
bc_hdr->subnet_id = endpoint->endpoint_btl->port_info.subnet_id;
|
||||
bc_hdr->vpid = ORTE_PROC_MY_NAME->vpid;
|
||||
bc_hdr->vpid = orca_process_info_get_vpid(ORCA_PROC_MY_NAME);
|
||||
bc_hdr->index = index;
|
||||
|
||||
if(newep->nbo) {
|
||||
@ -739,7 +740,7 @@ void mca_btl_openib_dump_all_local_rdma_frags(mca_btl_openib_device_t *device) {
|
||||
mca_btl_openib_endpoint_t* endpoint;
|
||||
|
||||
c = device->eager_rdma_buffers_count;
|
||||
opal_output(0, "rank=%d, device=%s", ORTE_PROC_MY_NAME->vpid, device->ib_dev->name);
|
||||
opal_output(0, "rank=%d, device=%s", orca_process_info_get_vpid(ORCA_PROC_MY_NAME), device->ib_dev->name);
|
||||
|
||||
for(i = 0; i < c; i++) {
|
||||
endpoint = device->eager_rdma_buffers[i];
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2008 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -30,7 +31,7 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "btl_openib.h"
|
||||
@ -247,7 +248,7 @@ static int parse_file(char *filename)
|
||||
ini_filename = filename;
|
||||
btl_openib_ini_yyin = fopen(filename, "r");
|
||||
if (NULL == btl_openib_ini_yyin) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "ini file:file not found",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "ini file:file not found",
|
||||
true, filename);
|
||||
ret = OMPI_ERR_NOT_FOUND;
|
||||
goto cleanup;
|
||||
@ -424,7 +425,7 @@ static int parse_line(parsed_section_values_t *sv)
|
||||
/* Have no idea what this parameter is. Not an error -- just
|
||||
ignore it */
|
||||
if (!showed_unknown_field_warning) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"ini file:unknown field", true,
|
||||
ini_filename, btl_openib_ini_yynewlines,
|
||||
key_buffer);
|
||||
@ -693,7 +694,7 @@ static inline void show_help(const char *topic)
|
||||
if (0 == strcmp("\n", btl_openib_ini_yytext)) {
|
||||
btl_openib_ini_yytext = "<end of line>";
|
||||
}
|
||||
orte_show_help("help-mpi-btl-openib.txt", topic, true,
|
||||
orca_show_help("help-mpi-btl-openib.txt", topic, true,
|
||||
ini_filename, btl_openib_ini_yynewlines,
|
||||
btl_openib_ini_yytext);
|
||||
btl_openib_ini_yytext = save;
|
||||
|
@ -2,6 +2,8 @@
|
||||
* Copyright (c) 2008 Chelsio, Inc. All rights reserved.
|
||||
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
@ -22,7 +24,7 @@
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/if.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "connect/connect.h"
|
||||
#endif
|
||||
@ -197,9 +199,9 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
|
||||
if (NULL == temp || NULL == temp[0] || NULL == temp[1] ||
|
||||
NULL != temp[2]) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "include",
|
||||
orte_process_info.nodename, list[i],
|
||||
orca_process_info_get_nodename(), list[i],
|
||||
"Invalid specification (missing \"/\")");
|
||||
if (NULL != temp) {
|
||||
opal_argv_free(temp);
|
||||
@ -208,9 +210,9 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
}
|
||||
|
||||
if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "include",
|
||||
orte_process_info.nodename, list[i],
|
||||
orca_process_info_get_nodename(), list[i],
|
||||
"Invalid specification (inet_pton() failed)");
|
||||
opal_argv_free(temp);
|
||||
continue;
|
||||
@ -239,9 +241,9 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
|
||||
if (NULL == temp || NULL == temp[0] || NULL == temp[1] ||
|
||||
NULL != temp[2]) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "exclude",
|
||||
orte_process_info.nodename, list[i],
|
||||
orca_process_info_get_nodename(), list[i],
|
||||
"Invalid specification (missing \"/\")");
|
||||
if (NULL != temp) {
|
||||
opal_argv_free(temp);
|
||||
@ -250,9 +252,9 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
}
|
||||
|
||||
if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "exclude",
|
||||
orte_process_info.nodename, list[i],
|
||||
orca_process_info_get_nodename(), list[i],
|
||||
"Invalid specification (inet_pton() failed)");
|
||||
opal_argv_free(temp);
|
||||
continue;
|
||||
|
@ -15,6 +15,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -30,7 +31,7 @@
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_mca.h"
|
||||
#include "btl_openib_ini.h"
|
||||
@ -179,9 +180,9 @@ int btl_openib_register_mca_params(void)
|
||||
mca_btl_openib_component.want_fork_support = ival;
|
||||
#else
|
||||
if (0 != ival) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"ibv_fork requested but not supported", true,
|
||||
orte_process_info.nodename);
|
||||
orca_process_info_get_nodename());
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#endif
|
||||
@ -210,9 +211,9 @@ int btl_openib_register_mca_params(void)
|
||||
} else if (0 == strcasecmp(str, "all")) {
|
||||
mca_btl_openib_component.device_type = BTL_OPENIB_DT_ALL;
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
orca_show_help("help-mpi-btl-openib.txt",
|
||||
"ibv_fork requested but not supported", true,
|
||||
orte_process_info.nodename);
|
||||
orca_process_info_get_nodename());
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
free(str);
|
||||
@ -297,7 +298,7 @@ int btl_openib_register_mca_params(void)
|
||||
CHECK(reg_int("mtu", "ib_mtu", msg, IBV_MTU_1024, &ival, 0));
|
||||
free(msg);
|
||||
if (ival < IBV_MTU_1024 || ival > IBV_MTU_4096) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "invalid value for btl_openib_ib_mtu",
|
||||
"btl_openib_ib_mtu reset to 1024");
|
||||
mca_btl_openib_component.ib_mtu = IBV_MTU_1024;
|
||||
@ -310,12 +311,12 @@ int btl_openib_register_mca_params(void)
|
||||
"(must be >= 0 and <= 31)",
|
||||
25, &ival, 0));
|
||||
if (ival > 31) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_min_rnr_timer > 31",
|
||||
"btl_openib_ib_min_rnr_timer reset to 31");
|
||||
ival = 31;
|
||||
} else if (ival < 0){
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_min_rnr_timer < 0",
|
||||
"btl_openib_ib_min_rnr_timer reset to 0");
|
||||
ival = 0;
|
||||
@ -327,12 +328,12 @@ int btl_openib_register_mca_params(void)
|
||||
"(must be >= 0 and <= 31)",
|
||||
20, &ival, 0));
|
||||
if (ival > 31) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_timeout > 31",
|
||||
"btl_openib_ib_timeout reset to 31");
|
||||
ival = 31;
|
||||
} else if (ival < 0) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_timeout < 0",
|
||||
"btl_openib_ib_timeout reset to 0");
|
||||
ival = 0;
|
||||
@ -344,12 +345,12 @@ int btl_openib_register_mca_params(void)
|
||||
"(must be >= 0 and <= 7)",
|
||||
7, &ival, 0));
|
||||
if (ival > 7) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_retry_count > 7",
|
||||
"btl_openib_ib_retry_count reset to 7");
|
||||
ival = 7;
|
||||
} else if (ival < 0) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_retry_count < 0",
|
||||
"btl_openib_ib_retry_count reset to 0");
|
||||
ival = 0;
|
||||
@ -364,12 +365,12 @@ int btl_openib_register_mca_params(void)
|
||||
"(must be >= 0 and <= 7; 7 = \"infinite\")",
|
||||
7, &ival, 0));
|
||||
if (ival > 7) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_rnr_retry > 7",
|
||||
"btl_openib_ib_rnr_retry reset to 7");
|
||||
ival = 7;
|
||||
} else if (ival < 0) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_rnr_retry < 0",
|
||||
"btl_openib_ib_rnr_retry reset to 0");
|
||||
ival = 0;
|
||||
@ -386,12 +387,12 @@ int btl_openib_register_mca_params(void)
|
||||
"(must be >= 0 and <= 15)",
|
||||
0, &ival, 0));
|
||||
if (ival > 15) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_service_level > 15",
|
||||
"btl_openib_ib_service_level reset to 15");
|
||||
ival = 15;
|
||||
} else if (ival < 0) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_ib_service_level < 0",
|
||||
"btl_openib_ib_service_level reset to 0");
|
||||
ival = 0;
|
||||
@ -494,8 +495,8 @@ int btl_openib_register_mca_params(void)
|
||||
"(must be > 0 and power of two)",
|
||||
64, &ival, REGINT_GE_ZERO));
|
||||
if(ival <= 1 || (ival & (ival - 1))) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
|
||||
true, ival, orte_process_info.nodename, 64);
|
||||
orca_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
|
||||
true, ival, orca_process_info_get_nodename(), 64);
|
||||
mca_btl_openib_component.buffer_alignment = 64;
|
||||
} else {
|
||||
mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
|
||||
@ -618,7 +619,7 @@ int btl_openib_register_mca_params(void)
|
||||
if (mca_btl_openib_component.use_memalign != 32
|
||||
&& mca_btl_openib_component.use_memalign != 64
|
||||
&& mca_btl_openib_component.use_memalign != 0){
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "Wrong btl_openib_memalign parameter value. Allowed values: 64, 32, 0.",
|
||||
"btl_openib_memalign is reset to 32");
|
||||
mca_btl_openib_component.use_memalign = 32;
|
||||
@ -631,7 +632,7 @@ int btl_openib_register_mca_params(void)
|
||||
&ival,
|
||||
REGINT_GE_ZERO);
|
||||
if (ival < 0){
|
||||
orte_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
|
||||
true, "btl_openib_memalign_threshold must be positive",
|
||||
"btl_openib_memalign_threshold is reset to btl_openib_eager_limit");
|
||||
ival = mca_btl_openib_component.eager_limit;
|
||||
|
@ -11,6 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -154,7 +155,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
BTL_ERROR(("[%s:%d] ompi_modex_recv failed for peer %s",
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
ORCA_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
OBJ_RELEASE(module_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -44,7 +45,7 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
|
||||
dev_name = ibv_get_device_name(device->ib_dev);
|
||||
len = asprintf(&xrc_file_name,
|
||||
"%s"OPAL_PATH_SEP"openib_xrc_domain_%s",
|
||||
orte_process_info.job_session_dir, dev_name);
|
||||
orca_process_info_get_job_session_dir(), dev_name);
|
||||
if (0 > len) {
|
||||
BTL_ERROR(("Failed to allocate memomry for XRC file name: %s\n",
|
||||
strerror(errno)));
|
||||
@ -111,7 +112,7 @@ static void ib_address_destructor(ib_address_t *ib_addr)
|
||||
OBJ_DESTRUCT(&ib_addr->pending_ep);
|
||||
}
|
||||
|
||||
static int ib_address_init(ib_address_t *ib_addr, uint16_t lid, uint64_t s_id, orte_jobid_t ep_jobid)
|
||||
static int ib_address_init(ib_address_t *ib_addr, uint16_t lid, uint64_t s_id, orca_jobid_t ep_jobid)
|
||||
{
|
||||
ib_addr->key = malloc(SIZE_OF3(s_id, lid, ep_jobid));
|
||||
if (NULL == ib_addr->key) {
|
||||
@ -136,7 +137,7 @@ static int ib_address_init(ib_address_t *ib_addr, uint16_t lid, uint64_t s_id, o
|
||||
* Before call to this function you need to protect with
|
||||
*/
|
||||
int mca_btl_openib_ib_address_add_new (uint16_t lid, uint64_t s_id,
|
||||
orte_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep)
|
||||
orca_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep)
|
||||
{
|
||||
void *tmp;
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -44,6 +45,6 @@ typedef struct ib_address_t ib_address_t;
|
||||
int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device);
|
||||
int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device);
|
||||
int mca_btl_openib_ib_address_add_new (uint16_t lid, uint64_t s_id,
|
||||
orte_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep);
|
||||
orca_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep);
|
||||
|
||||
#endif
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2007 Mellanox Technologies, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,7 +27,7 @@
|
||||
#include "connect/btl_openib_connect_udcm.h"
|
||||
#endif
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
@ -121,9 +121,9 @@ int ompi_btl_openib_connect_base_register(void)
|
||||
}
|
||||
}
|
||||
if (NULL == all[i]) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"cpc name not found", true,
|
||||
"include", orte_process_info.nodename,
|
||||
"include", orca_process_info_get_nodename(),
|
||||
"include", cpc_include, temp[j],
|
||||
all_cpc_names);
|
||||
opal_argv_free(temp);
|
||||
@ -147,9 +147,9 @@ int ompi_btl_openib_connect_base_register(void)
|
||||
}
|
||||
}
|
||||
if (NULL == all[i]) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"cpc name not found", true,
|
||||
"exclude", orte_process_info.nodename,
|
||||
"exclude", orca_process_info_get_nodename(),
|
||||
"exclude", cpc_exclude, temp[j],
|
||||
all_cpc_names);
|
||||
opal_argv_free(temp);
|
||||
@ -292,9 +292,9 @@ int ompi_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *
|
||||
|
||||
/* If we got an empty array, then no CPCs were eligible. Doh! */
|
||||
if (0 == cpc_index) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"no cpcs for port", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
btl->port_num, msg);
|
||||
free(cpcs);
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2008-2011 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2009-2011 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,14 +27,11 @@
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal_stdint.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
|
||||
#include "btl_openib.h"
|
||||
@ -77,11 +74,11 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
uint8_t message_type);
|
||||
|
||||
static void rml_send_cb(int status, orte_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
static void rml_send_cb(int status, orca_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata);
|
||||
static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
static void rml_recv_cb(int status, orca_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata);
|
||||
|
||||
/*
|
||||
@ -149,12 +146,12 @@ static int oob_component_query(mca_btl_openib_module_t *btl,
|
||||
ensure to only post it *once*, because another btl may have
|
||||
come in before this and already posted it. */
|
||||
if (!rml_recv_posted) {
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
OMPI_RML_TAG_OPENIB,
|
||||
ORTE_RML_PERSISTENT,
|
||||
rml_recv_cb,
|
||||
NULL);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
rc = orca_oob_recv_buffer_nb(ORCA_NAME_WILDCARD,
|
||||
OMPI_RML_TAG_OPENIB,
|
||||
ORCA_OOB_PERSISTENT,
|
||||
rml_recv_cb,
|
||||
NULL);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: oob CPC system error %d (%s)",
|
||||
rc, opal_strerror(rc));
|
||||
@ -165,7 +162,7 @@ static int oob_component_query(mca_btl_openib_module_t *btl,
|
||||
|
||||
*cpc = (ompi_btl_openib_connect_base_module_t *) malloc(sizeof(ompi_btl_openib_connect_base_module_t));
|
||||
if (NULL == *cpc) {
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
|
||||
orca_oob_recv_cancel(ORCA_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
|
||||
rml_recv_posted = false;
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: oob CPC system error (malloc failed)");
|
||||
@ -221,7 +218,7 @@ static int oob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
|
||||
static int oob_component_finalize(void)
|
||||
{
|
||||
if (rml_recv_posted) {
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
|
||||
orca_oob_recv_cancel(ORCA_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
|
||||
rml_recv_posted = false;
|
||||
}
|
||||
#if (ENABLE_DYNAMIC_SL)
|
||||
@ -486,9 +483,9 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
my_qp = ibv_create_qp(openib_btl->device->ib_pd, &init_attr);
|
||||
|
||||
if (NULL == my_qp) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"ibv_create_qp failed", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
"Reliable connected (RC)");
|
||||
return OMPI_ERROR;
|
||||
@ -497,8 +494,8 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
|
||||
if (init_attr.cap.max_inline_data < req_inline) {
|
||||
endpoint->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true, orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num,
|
||||
req_inline, init_attr.cap.max_inline_data);
|
||||
@ -539,7 +536,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
int rc;
|
||||
|
||||
if (NULL == buffer) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_OUT_OF_RESOURCE);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -547,14 +544,14 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8));
|
||||
rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64));
|
||||
rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -565,13 +562,13 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
&endpoint->rem_info.rem_qps[0].rem_qp_num, 1,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
|
||||
rc = opal_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
@ -584,14 +581,14 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num,
|
||||
1, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
|
||||
rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
@ -599,30 +596,30 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
|
||||
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
|
||||
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->device->mtu, 1,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
|
||||
rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* send to remote endpoint */
|
||||
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name,
|
||||
rc = orca_oob_send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name,
|
||||
buffer, OMPI_RML_TAG_OPENIB, 0,
|
||||
rml_send_cb, NULL);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("Sent QP Info, LID = %d, SUBNET = %" PRIx64 "\n",
|
||||
@ -637,8 +634,8 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
* Callback when we have finished RML sending the connect data to a
|
||||
* remote peer
|
||||
*/
|
||||
static void rml_send_cb(int status, orte_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
static void rml_send_cb(int status, orca_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
@ -650,8 +647,8 @@ static void rml_send_cb(int status, orte_process_name_t* endpoint,
|
||||
* and if this endpoint is trying to connect, reply with our QP info,
|
||||
* otherwise try to modify QP's and establish reliable connection
|
||||
*/
|
||||
static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
static void rml_recv_cb(int status, orca_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
mca_btl_openib_proc_t *ib_proc;
|
||||
@ -674,16 +671,16 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
our door */
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8));
|
||||
rc = opal_dss.unpack(buffer, &message_type, &cnt, OPAL_UINT8);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, OPAL_UINT64);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -691,15 +688,15 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
if (ENDPOINT_CONNECT_REQUEST != message_type) {
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &lcl_qp, &cnt, OPAL_UINT32);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
|
||||
rc = opal_dss.unpack(buffer, &lcl_lid, &cnt, OPAL_UINT16);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -716,16 +713,16 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt,
|
||||
OPAL_UINT32);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_psn, &cnt,
|
||||
OPAL_UINT32);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -733,22 +730,22 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_lid, &cnt, OPAL_UINT16);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, OPAL_UINT32);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_index, &cnt, OPAL_UINT32);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -758,8 +755,9 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
rem_info.rem_lid,
|
||||
rem_info.rem_subnet_id));
|
||||
|
||||
master = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME,
|
||||
process_name) > 0 ? true : false;
|
||||
master = orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
ORCA_PROC_MY_NAME,
|
||||
process_name) > 0 ? true : false;
|
||||
|
||||
/* Need to protect the ib_procs list */
|
||||
OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
|
||||
@ -771,7 +769,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
|
||||
bool found = false;
|
||||
|
||||
if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
if (OPAL_EQUAL != orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&ib_proc->proc_ompi->proc_name, process_name)) {
|
||||
continue;
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -48,7 +48,7 @@
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "btl_openib_fd.h"
|
||||
#include "btl_openib_proc.h"
|
||||
@ -247,7 +247,7 @@ static void rdmacm_component_register(void)
|
||||
if (value >= 0 && value < 65536) {
|
||||
rdmacm_port = (uint16_t) value;
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"illegal tcp port", true, value);
|
||||
}
|
||||
|
||||
@ -258,7 +258,7 @@ static void rdmacm_component_register(void)
|
||||
if (value > 0) {
|
||||
rdmacm_resolve_timeout = value;
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"illegal timeout", true, value);
|
||||
}
|
||||
|
||||
@ -269,7 +269,7 @@ static void rdmacm_component_register(void)
|
||||
if (value > 0) {
|
||||
rdmacm_resolve_max_retry_count = value;
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"illegal retry count", true, value);
|
||||
}
|
||||
|
||||
@ -453,9 +453,9 @@ static int rdmacm_setup_qp(rdmacm_contents_t *contents,
|
||||
endpoint->qps[qpnum].credit_frag = NULL;
|
||||
if (attr.cap.max_inline_data < req_inline) {
|
||||
endpoint->qps[qpnum].ib_inline_max = attr.cap.max_inline_data;
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(contents->openib_btl->device->ib_dev),
|
||||
contents->openib_btl->port_num,
|
||||
req_inline, attr.cap.max_inline_data);
|
||||
@ -753,16 +753,16 @@ static void *show_help_cant_find_endpoint(void *context)
|
||||
|
||||
if (NULL != c) {
|
||||
msg = stringify(c->peer_ip_addr);
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"could not find matching endpoint", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
c->device_name,
|
||||
c->peer_tcp_port);
|
||||
free(msg);
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"could not find matching endpoint", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
"<unknown>", "<unknown>", -1);
|
||||
}
|
||||
free(context);
|
||||
@ -1463,9 +1463,9 @@ static void *show_help_rdmacm_event_error(void *c)
|
||||
id_context_t *context = (id_context_t*) event->id->context;
|
||||
|
||||
if (RDMA_CM_EVENT_DEVICE_REMOVAL == event->event) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"rdma cm device removal", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(event->id->verbs->device));
|
||||
} else {
|
||||
const char *device = "Unknown";
|
||||
@ -1474,9 +1474,9 @@ static void *show_help_rdmacm_event_error(void *c)
|
||||
NULL != event->id->verbs->device) {
|
||||
device = ibv_get_device_name(event->id->verbs->device);
|
||||
}
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"rdma cm event error", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
device,
|
||||
rdma_event_str(event->event),
|
||||
context->endpoint->endpoint_proc->proc_ompi->proc_hostname);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "btl_openib.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "connect/btl_openib_connect_sl.h"
|
||||
#include <infiniband/iba/ib_types.h>
|
||||
@ -108,8 +108,8 @@ static int init_ud_qp(struct ibv_context *context_arg,
|
||||
cache->cq = ibv_create_cq(cache->context, 4, NULL, NULL, 0);
|
||||
if (NULL == cache->cq) {
|
||||
BTL_ERROR(("error creating cq, errno says %s", strerror(errno)));
|
||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, orca_process_info_get_nodename(),
|
||||
__FILE__, __LINE__, "ibv_create_cq",
|
||||
strerror(errno), errno,
|
||||
ibv_get_device_name(context_arg->device));
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All
|
||||
* rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -70,8 +70,7 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal_stdint.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "btl_openib_endpoint.h"
|
||||
#include "btl_openib_proc.h"
|
||||
@ -1052,8 +1051,9 @@ static int udcm_create_sync_qp (mca_btl_base_endpoint_t *lcl_ep)
|
||||
|
||||
udep->sync_qp = ibv_create_qp(m->btl->device->ib_pd, &init_attr);
|
||||
if (NULL == udep->sync_qp) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"ibv_create_qp failed", true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"ibv_create_qp failed", true,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(m->btl->device->ib_dev),
|
||||
"Reliable connected (RC)");
|
||||
|
||||
@ -1166,8 +1166,9 @@ static int udcm_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ep,
|
||||
lcl_ep->qps[qp].qp->lcl_qp = ibv_create_qp(m->btl->device->ib_pd,
|
||||
&init_attr);
|
||||
if (NULL == lcl_ep->qps[qp].qp->lcl_qp) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"ibv_create_qp failed", true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"ibv_create_qp failed", true,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(m->btl->device->ib_dev),
|
||||
"Reliable connected (RC)");
|
||||
|
||||
@ -1176,8 +1177,9 @@ static int udcm_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ep,
|
||||
|
||||
if (init_attr.cap.max_inline_data < req_inline) {
|
||||
lcl_ep->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(m->btl->device->ib_dev),
|
||||
m->btl->port_num, req_inline,
|
||||
init_attr.cap.max_inline_data);
|
||||
|
@ -7,7 +7,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -21,11 +21,9 @@
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
|
||||
#include "btl_openib.h"
|
||||
@ -34,7 +32,6 @@
|
||||
#include "btl_openib_xrc.h"
|
||||
#include "btl_openib_async.h"
|
||||
#include "connect/connect.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#if (ENABLE_DYNAMIC_SL)
|
||||
#include "connect/btl_openib_connect_sl.h"
|
||||
#endif
|
||||
@ -94,9 +91,9 @@ static int xoob_priority = 60;
|
||||
* Callback when we have finished RML sending the connect data to a
|
||||
* remote peer
|
||||
*/
|
||||
static void xoob_rml_send_cb(int status, orte_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
static void xoob_rml_send_cb(int status, orca_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
}
|
||||
@ -111,7 +108,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8));
|
||||
rc = opal_dss.unpack(buffer, message_type, &cnt, OPAL_UINT8);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack Message type = %d\n", *message_type));
|
||||
@ -119,7 +116,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64));
|
||||
rc = opal_dss.unpack(buffer, &info->rem_subnet_id, &cnt, OPAL_UINT64);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack sid = %" PRIx64 "\n", info->rem_subnet_id));
|
||||
@ -127,7 +124,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
|
||||
rc = opal_dss.unpack(buffer, &info->rem_lid, &cnt, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack lid = %d", info->rem_lid));
|
||||
@ -141,7 +138,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack remote qp = %x", info->rem_qps->rem_qp_num));
|
||||
@ -150,7 +147,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
rc = opal_dss.unpack(buffer, &info->rem_qps->rem_psn, &cnt,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack remote psn = %d", info->rem_qps->rem_psn));
|
||||
@ -158,7 +155,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &info->rem_mtu, &cnt, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack remote mtu = %d", info->rem_mtu));
|
||||
@ -170,7 +167,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
|
||||
rc = opal_dss.unpack(buffer, lid, &cnt, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack requested lid = %d", *lid));
|
||||
@ -183,7 +180,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack requested qp = %x", info->rem_qps->rem_qp_num));
|
||||
@ -194,7 +191,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &info->rem_index, &cnt, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack remote index = %d", info->rem_index));
|
||||
@ -203,7 +200,7 @@ static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &info->rem_srqs[srq].rem_srq_num, &cnt, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
BTL_VERBOSE(("Recv unpack remote index srq num[%d]= %d", srq, info->rem_srqs[srq].rem_srq_num));
|
||||
@ -222,7 +219,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
int rc, srq;
|
||||
|
||||
if (NULL == buffer) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_OUT_OF_RESOURCE);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -236,7 +233,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8));
|
||||
rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -244,7 +241,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64));
|
||||
rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -252,7 +249,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
|
||||
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -279,14 +276,14 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
|
||||
rc = opal_dss.pack(buffer, &qp_num, 1, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("Send pack lpsn = %d", psn));
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
|
||||
rc = opal_dss.pack(buffer, &psn, 1, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -295,7 +292,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->device->mtu, 1,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
@ -312,7 +309,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
|
||||
rc = opal_dss.pack(buffer, &endpoint->ib_addr->lid, 1, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
@ -325,7 +322,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
rc = opal_dss.pack(buffer, &endpoint->ib_addr->remote_xrc_rcv_qp_num,
|
||||
1, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
@ -341,7 +338,7 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
|
||||
rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* on response we add all SRQ numbers */
|
||||
@ -351,18 +348,18 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num,
|
||||
1, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* send to remote endpoint */
|
||||
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name,
|
||||
rc = orca_oob_send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name,
|
||||
buffer, OMPI_RML_TAG_XOPENIB, 0,
|
||||
xoob_rml_send_cb, NULL);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -414,9 +411,9 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
|
||||
qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
|
||||
*qp = ibv_create_qp(openib_btl->device->ib_pd, &qp_init_attr);
|
||||
if (NULL == *qp) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"ibv_create_qp failed", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
"Reliable connected (XRC)");
|
||||
return OMPI_ERROR;
|
||||
@ -424,8 +421,8 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
|
||||
|
||||
if (qp_init_attr.cap.max_inline_data < req_inline) {
|
||||
endpoint->qps[0].ib_inline_max = qp_init_attr.cap.max_inline_data;
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", orca_process_info_get_nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num,
|
||||
req_inline, qp_init_attr.cap.max_inline_data);
|
||||
@ -689,7 +686,7 @@ static int xoob_reply_first_connect(mca_btl_openib_endpoint_t *endpoint,
|
||||
}
|
||||
|
||||
/* Find endpoint for specific subnet/lid/message */
|
||||
static mca_btl_openib_endpoint_t* xoob_find_endpoint(orte_process_name_t* process_name,
|
||||
static mca_btl_openib_endpoint_t* xoob_find_endpoint(orca_process_name_t* process_name,
|
||||
uint64_t subnet_id, uint16_t lid, uint8_t message_type)
|
||||
{
|
||||
size_t i;
|
||||
@ -711,7 +708,7 @@ static mca_btl_openib_endpoint_t* xoob_find_endpoint(orte_process_name_t* proces
|
||||
ib_proc != (mca_btl_openib_proc_t*)
|
||||
opal_list_get_end(&mca_btl_openib_component.ib_procs);
|
||||
ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
if (OPAL_EQUAL == orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&ib_proc->proc_ompi->proc_name, process_name)) {
|
||||
found = true;
|
||||
break;
|
||||
@ -822,8 +819,8 @@ static void free_rem_info(mca_btl_openib_rem_info_t *rem_info)
|
||||
* and if this endpoint is trying to connect, reply with our QP info,
|
||||
* otherwise try to modify QP's and establish reliable connection
|
||||
*/
|
||||
static void xoob_rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
static void xoob_rml_recv_cb(int status, orca_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc;
|
||||
@ -1028,12 +1025,12 @@ static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
|
||||
ensure to only post it *once*, because another btl may have
|
||||
come in before this and already posted it. */
|
||||
if (!rml_recv_posted) {
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
OMPI_RML_TAG_XOPENIB,
|
||||
ORTE_RML_PERSISTENT,
|
||||
xoob_rml_recv_cb,
|
||||
NULL);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
rc = orca_oob_recv_buffer_nb(ORCA_NAME_WILDCARD,
|
||||
OMPI_RML_TAG_XOPENIB,
|
||||
ORCA_RML_PERSISTENT,
|
||||
xoob_rml_recv_cb,
|
||||
NULL);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: xoob CPC system error %d (%s)",
|
||||
rc, opal_strerror(rc));
|
||||
@ -1141,7 +1138,7 @@ static int xoob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
|
||||
static int xoob_component_finalize(void)
|
||||
{
|
||||
if (rml_recv_posted) {
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_XOPENIB);
|
||||
orca_oob_recv_cancel(ORCA_NAME_WILDCARD, OMPI_RML_TAG_XOPENIB);
|
||||
rml_recv_posted = false;
|
||||
}
|
||||
#if (ENABLE_DYNAMIC_SL)
|
||||
|
@ -11,6 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -942,7 +943,7 @@ void mca_btl_sctp_component_accept(void)
|
||||
*/
|
||||
static void mca_btl_sctp_component_recv_handler(int sd, short flags, void* user)
|
||||
{
|
||||
orte_process_name_t guid;
|
||||
orca_process_name_t guid;
|
||||
struct sockaddr_in addr;
|
||||
int retval;
|
||||
mca_btl_sctp_proc_t* btl_proc;
|
||||
@ -966,7 +967,7 @@ static void mca_btl_sctp_component_recv_handler(int sd, short flags, void* user)
|
||||
}
|
||||
SCTP_BTL_ERROR(("mca_btl_sctp_component_recv_handler() sd=%d, got %d byte guid.\n", sd, retval));
|
||||
|
||||
ORTE_PROCESS_NAME_NTOH(guid);
|
||||
ORCA_PROCESS_NAME_NTOH(guid);
|
||||
|
||||
/* lookup the corresponding process */
|
||||
btl_proc = mca_btl_sctp_proc_lookup(&guid);
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -59,7 +60,8 @@
|
||||
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "btl_sctp.h"
|
||||
#include "btl_sctp_endpoint.h"
|
||||
#include "btl_sctp_proc.h"
|
||||
@ -368,7 +370,7 @@ int mca_btl_sctp_endpoint_send(mca_btl_base_endpoint_t* btl_endpoint, mca_btl_sc
|
||||
int rc = OMPI_SUCCESS;
|
||||
|
||||
/* What if there are multiple procs on this endpoint? Possible? */
|
||||
orte_vpid_t vpid = btl_endpoint->endpoint_proc->proc_ompi->proc_name.vpid;
|
||||
orca_vpid_t vpid = btl_endpoint->endpoint_proc->proc_ompi->proc_name.vpid;
|
||||
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
|
||||
|
||||
if((mca_btl_sctp_proc_check_vpid(vpid, sender_proc_table)) == INVALID_ENTRY) {
|
||||
@ -545,9 +547,9 @@ static int mca_btl_sctp_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_e
|
||||
{
|
||||
/* send process identifier to remote endpoint */
|
||||
mca_btl_sctp_proc_t* btl_proc = mca_btl_sctp_proc_local();
|
||||
orte_process_name_t guid = btl_proc->proc_ompi->proc_name;
|
||||
orca_process_name_t guid = btl_proc->proc_ompi->proc_name;
|
||||
|
||||
ORTE_PROCESS_NAME_HTON(guid);
|
||||
ORCA_PROCESS_NAME_HTON(guid);
|
||||
if(mca_btl_sctp_endpoint_send_blocking(btl_endpoint, &guid, sizeof(guid)) !=
|
||||
sizeof(guid)) {
|
||||
return OMPI_ERR_UNREACH;
|
||||
@ -578,7 +580,7 @@ bool mca_btl_sctp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct
|
||||
btl_addr->addr_inet.s_addr == addr->sin_addr.s_addr)
|
||||
{
|
||||
mca_btl_sctp_proc_t *endpoint_proc = btl_endpoint->endpoint_proc;
|
||||
cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
cmpval = orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&endpoint_proc->proc_ompi->proc_name,
|
||||
&this_proc->proc_ompi->proc_name);
|
||||
if((btl_endpoint->endpoint_sd < 0) ||
|
||||
@ -828,19 +830,19 @@ static int mca_btl_sctp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endp
|
||||
|
||||
static int mca_btl_sctp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
|
||||
{
|
||||
orte_process_name_t guid;
|
||||
orca_process_name_t guid;
|
||||
mca_btl_sctp_proc_t* btl_proc = btl_endpoint->endpoint_proc;
|
||||
|
||||
if((mca_btl_sctp_endpoint_recv_blocking(btl_endpoint, &guid,
|
||||
sizeof(orte_process_name_t))) != sizeof(orte_process_name_t)) {
|
||||
sizeof(orca_process_name_t))) != sizeof(orca_process_name_t)) {
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
ORTE_PROCESS_NAME_NTOH(guid);
|
||||
ORCA_PROCESS_NAME_NTOH(guid);
|
||||
|
||||
/* compare this to the expected values */
|
||||
if(memcmp(&btl_proc->proc_ompi->proc_name, &guid, sizeof(orte_process_name_t)) != 0) {
|
||||
if(memcmp(&btl_proc->proc_ompi->proc_name, &guid, sizeof(orca_process_name_t)) != 0) {
|
||||
BTL_ERROR(("received unexpected process identifier %s",
|
||||
ORTE_NAME_PRINT(&guid)));
|
||||
ORCA_NAME_PRINT(&guid)));
|
||||
mca_btl_sctp_endpoint_close(btl_endpoint);
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
@ -1198,7 +1200,7 @@ static void mca_btl_sctp_endpoint_send_handler(int sd, short flags, void* user)
|
||||
/* 1 to many */
|
||||
mca_btl_sctp_endpoint_t* btl_endpoint = (mca_btl_sctp_endpoint_t *)user;
|
||||
our_sctp_endpoint *current_our_endpoint = NULL;
|
||||
orte_vpid_t vpid;
|
||||
orca_vpid_t vpid;
|
||||
send_handler_1_to_many_different_endpoint:
|
||||
vpid = btl_endpoint->endpoint_proc->proc_ompi->proc_name.vpid;
|
||||
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -67,7 +68,7 @@ void mca_btl_sctp_proc_destruct(mca_btl_sctp_proc_t* stcp_proc)
|
||||
/* remove from list of all proc instances */
|
||||
OPAL_THREAD_LOCK(&mca_btl_sctp_component.sctp_lock);
|
||||
opal_hash_table_remove_value_uint64(&mca_btl_sctp_component.sctp_procs,
|
||||
orte_util_hash_name(&stcp_proc->proc_ompi->proc_name));
|
||||
orca_process_info_hash_name(&stcp_proc->proc_ompi->proc_name));
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_sctp_component.sctp_lock);
|
||||
|
||||
/* release resources */
|
||||
@ -113,7 +114,7 @@ mca_btl_sctp_proc_t* mca_btl_sctp_proc_create(ompi_proc_t* ompi_proc)
|
||||
int rc;
|
||||
size_t size;
|
||||
mca_btl_sctp_proc_t* btl_proc;
|
||||
uint64_t hash = orte_util_hash_name(&ompi_proc->proc_name);
|
||||
uint64_t hash = orca_process_info_hash_name(&ompi_proc->proc_name);
|
||||
|
||||
OPAL_THREAD_LOCK(&mca_btl_sctp_component.sctp_lock);
|
||||
rc = opal_hash_table_get_value_uint64(&mca_btl_sctp_component.sctp_procs,
|
||||
@ -334,12 +335,12 @@ int mca_btl_sctp_proc_remove(mca_btl_sctp_proc_t* btl_proc, mca_btl_base_endpoin
|
||||
* Look for an existing SCTP process instance based on the globally unique
|
||||
* process identifier.
|
||||
*/
|
||||
mca_btl_sctp_proc_t* mca_btl_sctp_proc_lookup(const orte_process_name_t *name)
|
||||
mca_btl_sctp_proc_t* mca_btl_sctp_proc_lookup(const orca_process_name_t *name)
|
||||
{
|
||||
mca_btl_sctp_proc_t* proc = NULL;
|
||||
OPAL_THREAD_LOCK(&mca_btl_sctp_component.sctp_lock);
|
||||
opal_hash_table_get_value_uint64(&mca_btl_sctp_component.sctp_procs,
|
||||
orte_util_hash_name(name), (void**)&proc);
|
||||
orca_process_info_hash_name(name), (void**)&proc);
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_sctp_component.sctp_lock);
|
||||
return proc;
|
||||
}
|
||||
@ -373,11 +374,11 @@ bool mca_btl_sctp_proc_accept(mca_btl_sctp_proc_t* btl_proc, struct sockaddr_in*
|
||||
*
|
||||
* TODO - change this to use a hash for constant time performance
|
||||
*/
|
||||
static int mca_btl_sctp_proc_check(int is_vpid, sctp_assoc_t id, orte_vpid_t vpid, struct mca_btl_sctp_proc_table_node *table) {
|
||||
static int mca_btl_sctp_proc_check(int is_vpid, sctp_assoc_t id, orca_vpid_t vpid, struct mca_btl_sctp_proc_table_node *table) {
|
||||
#if MCA_BTL_SCTP_DONT_USE_HASH
|
||||
int i;
|
||||
for(i = 0; i < MCA_BTL_SCTP_PROC_TABLE_SIZE; i++) {
|
||||
/* sender_proc_table uses orte_vpid_t.
|
||||
/* sender_proc_table uses orca_vpid_t.
|
||||
* recvr_proc_table uses sctp_assoc_id.
|
||||
* Calls using this function use one or the other.
|
||||
*/
|
||||
@ -403,7 +404,7 @@ static int mca_btl_sctp_proc_check(int is_vpid, sctp_assoc_t id, orte_vpid_t vpi
|
||||
#endif
|
||||
}
|
||||
|
||||
int mca_btl_sctp_proc_check_vpid(orte_vpid_t vpid, struct mca_btl_sctp_proc_table_node *table) {
|
||||
int mca_btl_sctp_proc_check_vpid(orca_vpid_t vpid, struct mca_btl_sctp_proc_table_node *table) {
|
||||
return mca_btl_sctp_proc_check(1, 0, vpid, table);
|
||||
}
|
||||
|
||||
@ -421,7 +422,7 @@ int mca_btl_sctp_proc_check_assoc_id(sctp_assoc_t id, struct mca_btl_sctp_proc_t
|
||||
* TODO change this to a hash table that can expand to eliminate
|
||||
* MCA_BTL_SCTP_PROC_TABLE_SIZE limitation
|
||||
*/
|
||||
static void mca_btl_sctp_proc_add(sctp_assoc_t id, orte_vpid_t vpid, struct mca_btl_sctp_proc_t *proc, struct mca_btl_sctp_proc_table_node *table) {
|
||||
static void mca_btl_sctp_proc_add(sctp_assoc_t id, orca_vpid_t vpid, struct mca_btl_sctp_proc_t *proc, struct mca_btl_sctp_proc_table_node *table) {
|
||||
#if MCA_BTL_SCTP_DONT_USE_HASH
|
||||
int i;
|
||||
for(i = 0; i < MCA_BTL_SCTP_PROC_TABLE_SIZE; i++) {
|
||||
@ -440,7 +441,7 @@ static void mca_btl_sctp_proc_add(sctp_assoc_t id, orte_vpid_t vpid, struct mca_
|
||||
#endif
|
||||
}
|
||||
|
||||
void mca_btl_sctp_proc_add_vpid(orte_vpid_t vpid, struct mca_btl_sctp_proc_t *proc, struct mca_btl_sctp_proc_table_node *table) {
|
||||
void mca_btl_sctp_proc_add_vpid(orca_vpid_t vpid, struct mca_btl_sctp_proc_t *proc, struct mca_btl_sctp_proc_table_node *table) {
|
||||
mca_btl_sctp_proc_add(0, vpid, proc, table);
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -60,7 +61,7 @@ typedef struct mca_btl_sctp_proc_t mca_btl_sctp_proc_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_sctp_proc_t);
|
||||
|
||||
mca_btl_sctp_proc_t* mca_btl_sctp_proc_create(ompi_proc_t* ompi_proc);
|
||||
mca_btl_sctp_proc_t* mca_btl_sctp_proc_lookup(const orte_process_name_t* name);
|
||||
mca_btl_sctp_proc_t* mca_btl_sctp_proc_lookup(const orca_process_name_t* name);
|
||||
int mca_btl_sctp_proc_insert(mca_btl_sctp_proc_t*, mca_btl_base_endpoint_t*);
|
||||
int mca_btl_sctp_proc_remove(mca_btl_sctp_proc_t*, mca_btl_base_endpoint_t*);
|
||||
bool mca_btl_sctp_proc_accept(mca_btl_sctp_proc_t*, struct sockaddr_in*, int);
|
||||
@ -90,7 +91,7 @@ enum {
|
||||
struct mca_btl_sctp_proc_table_node {
|
||||
int valid;
|
||||
sctp_assoc_t sctp_assoc_id;
|
||||
orte_vpid_t vpid;
|
||||
orca_vpid_t vpid;
|
||||
struct mca_btl_sctp_proc_t *proc;
|
||||
};
|
||||
typedef struct mca_btl_sctp_proc_table_node mca_btl_sctp_proc_table_node;
|
||||
@ -98,9 +99,9 @@ typedef struct mca_btl_sctp_proc_table_node mca_btl_sctp_proc_table_node;
|
||||
extern struct mca_btl_sctp_proc_table_node *recvr_proc_table;
|
||||
extern struct mca_btl_sctp_proc_table_node *sender_proc_table;
|
||||
|
||||
int mca_btl_sctp_proc_check_vpid(orte_vpid_t vpid, struct mca_btl_sctp_proc_table_node *table);
|
||||
int mca_btl_sctp_proc_check_vpid(orca_vpid_t vpid, struct mca_btl_sctp_proc_table_node *table);
|
||||
int mca_btl_sctp_proc_check_assoc_id(sctp_assoc_t id, struct mca_btl_sctp_proc_table_node *table);
|
||||
void mca_btl_sctp_proc_add_vpid(orte_vpid_t vpid, struct mca_btl_sctp_proc_t *proc, struct mca_btl_sctp_proc_table_node *table);
|
||||
void mca_btl_sctp_proc_add_vpid(orca_vpid_t vpid, struct mca_btl_sctp_proc_t *proc, struct mca_btl_sctp_proc_table_node *table);
|
||||
void mca_btl_sctp_proc_add_assoc_id(sctp_assoc_t id, struct mca_btl_sctp_proc_t *proc, struct mca_btl_sctp_proc_table_node *table);
|
||||
mca_btl_sctp_proc_t *mca_btl_sctp_proc_get(sctp_assoc_t id, struct mca_btl_sctp_proc_table_node *table);
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -97,7 +98,7 @@ void mca_btl_sctp_recv_handler(int sd, short flags, void *user) {
|
||||
/* allocated this elsewhere only once per BTL to avoid repeatedly calling malloc */
|
||||
char *buf = sctp_recv_buf;
|
||||
|
||||
orte_process_name_t guid;
|
||||
orca_process_name_t guid;
|
||||
struct sockaddr_in their_addr;
|
||||
int retval;
|
||||
mca_btl_sctp_proc_t *btl_proc;
|
||||
@ -214,7 +215,7 @@ data_still_pending_on_endpoint:
|
||||
|
||||
/* Setup guid. */
|
||||
memcpy(&guid, buf, retval);
|
||||
ORTE_PROCESS_NAME_NTOH(guid);
|
||||
ORCA_PROCESS_NAME_NTOH(guid);
|
||||
|
||||
/* lookup the corresponding process */
|
||||
btl_proc = mca_btl_sctp_proc_lookup(&guid);
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -42,7 +43,8 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/printf.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
@ -252,8 +254,8 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
|
||||
|
||||
/* set file name */
|
||||
if (asprintf(&sm_ctl_file, "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename) < 0) {
|
||||
orca_process_info_get_job_session_dir(),
|
||||
orca_process_info_get_nodename() ) < 0) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -387,7 +389,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
|
||||
OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t);
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
sprintf(path, "%s"OPAL_PATH_SEP"sm_fifo.%lu",
|
||||
orte_process_info.job_session_dir,
|
||||
orca_process_info_get_job_session_dir(),
|
||||
(unsigned long)proc->proc_name.vpid);
|
||||
ep->fifo_fd = open(path, O_WRONLY);
|
||||
if(ep->fifo_fd < 0) {
|
||||
@ -1161,7 +1163,7 @@ int mca_btl_sm_ft_event(int state) {
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
if( orte_cr_continue_like_restart ) {
|
||||
if( orca_info_cr_continue_like_restart() ) {
|
||||
if( NULL != mca_btl_sm_component.sm_seg ) {
|
||||
/* Add shared memory file */
|
||||
opal_crs_base_cleanup_append(mca_btl_sm_component.sm_seg->shmem_ds.seg_name, false);
|
||||
|
@ -15,6 +15,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -46,9 +47,7 @@
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
@ -152,9 +151,9 @@ static int sm_register(void)
|
||||
mca_btl_sm_component.use_knem = i;
|
||||
} else {
|
||||
if (i > 0) {
|
||||
orte_show_help("help-mpi-btl-sm.txt",
|
||||
"knem requested but not supported", true,
|
||||
orte_process_info.nodename);
|
||||
orca_show_help("help-mpi-btl-sm.txt",
|
||||
"knem requested but not supported", true,
|
||||
orca_process_info_get_nodename());
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
mca_btl_sm_component.use_knem = 0;
|
||||
@ -367,7 +366,7 @@ static mca_btl_base_module_t** mca_btl_sm_component_init(
|
||||
*num_btls = 0;
|
||||
|
||||
/* if no session directory was created, then we cannot be used */
|
||||
if (!orte_create_session_dirs) {
|
||||
if (!orca_process_info_create_session_dirs()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -378,8 +377,8 @@ static mca_btl_base_module_t** mca_btl_sm_component_init(
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
/* create a named pipe to receive events */
|
||||
sprintf( mca_btl_sm_component.sm_fifo_path,
|
||||
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir,
|
||||
(unsigned long)ORTE_PROC_MY_NAME->vpid );
|
||||
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orca_process_info_get_job_session_dir(),
|
||||
(unsigned long)orca_process_info_get_vpid(ORCA_PROC_MY_NAME) );
|
||||
if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) {
|
||||
opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno);
|
||||
return NULL;
|
||||
@ -442,11 +441,11 @@ static mca_btl_base_module_t** mca_btl_sm_component_init(
|
||||
if (0 != stat("/dev/knem", &sbuf)) {
|
||||
sbuf.st_mode = 0;
|
||||
}
|
||||
orte_show_help("help-mpi-btl-sm.txt", "knem permission denied",
|
||||
true, orte_process_info.nodename, sbuf.st_mode);
|
||||
orca_show_help("help-mpi-btl-sm.txt", "knem permission denied",
|
||||
true, orca_process_info_get_nodename(), sbuf.st_mode);
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-sm.txt", "knem fail open",
|
||||
true, orte_process_info.nodename, errno,
|
||||
orca_show_help("help-mpi-btl-sm.txt", "knem fail open",
|
||||
true, orca_process_info_get_nodename(), errno,
|
||||
strerror(errno));
|
||||
}
|
||||
goto no_knem;
|
||||
@ -457,14 +456,14 @@ static mca_btl_base_module_t** mca_btl_sm_component_init(
|
||||
rc = ioctl(mca_btl_sm.knem_fd, KNEM_CMD_GET_INFO,
|
||||
&mca_btl_sm_component.knem_info);
|
||||
if (rc < 0) {
|
||||
orte_show_help("help-mpi-btl-sm.txt", "knem get ABI fail",
|
||||
true, orte_process_info.nodename, errno,
|
||||
orca_show_help("help-mpi-btl-sm.txt", "knem get ABI fail",
|
||||
true, orca_process_info_get_nodename(), errno,
|
||||
strerror(errno));
|
||||
goto no_knem;
|
||||
}
|
||||
if (KNEM_ABI_VERSION != mca_btl_sm_component.knem_info.abi) {
|
||||
orte_show_help("help-mpi-btl-sm.txt", "knem ABI mismatch",
|
||||
true, orte_process_info.nodename, KNEM_ABI_VERSION,
|
||||
orca_show_help("help-mpi-btl-sm.txt", "knem ABI mismatch",
|
||||
true, orca_process_info_get_nodename(), KNEM_ABI_VERSION,
|
||||
mca_btl_sm_component.knem_info.abi);
|
||||
goto no_knem;
|
||||
}
|
||||
@ -485,8 +484,8 @@ static mca_btl_base_module_t** mca_btl_sm_component_init(
|
||||
MAP_SHARED, mca_btl_sm.knem_fd,
|
||||
KNEM_STATUS_ARRAY_FILE_OFFSET);
|
||||
if (MAP_FAILED == mca_btl_sm.knem_status_array) {
|
||||
orte_show_help("help-mpi-btl-sm.txt", "knem mmap fail",
|
||||
true, orte_process_info.nodename, errno,
|
||||
orca_show_help("help-mpi-btl-sm.txt", "knem mmap fail",
|
||||
true, orca_process_info_get_nodename(), errno,
|
||||
strerror(errno));
|
||||
goto no_knem;
|
||||
}
|
||||
@ -497,8 +496,8 @@ static mca_btl_base_module_t** mca_btl_sm_component_init(
|
||||
malloc(sizeof(mca_btl_sm_frag_t *) *
|
||||
mca_btl_sm_component.knem_max_simultaneous);
|
||||
if (NULL == mca_btl_sm.knem_frag_array) {
|
||||
orte_show_help("help-mpi-btl-sm.txt", "knem init fail",
|
||||
true, orte_process_info.nodename, "malloc",
|
||||
orca_show_help("help-mpi-btl-sm.txt", "knem init fail",
|
||||
true, orca_process_info_get_nodename(), "malloc",
|
||||
errno, strerror(errno));
|
||||
goto no_knem;
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -38,7 +39,9 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/printf.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
@ -265,8 +268,8 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int n)
|
||||
|
||||
/* set file name */
|
||||
if (asprintf(&sm_ctl_file, "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename) < 0) {
|
||||
orca_process_info_get_job_session_dir(),
|
||||
orca_process_info_get_nodename()) < 0) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -400,7 +403,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
|
||||
OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t);
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
sprintf(path, "%s"OPAL_PATH_SEP"sm_fifo.%lu",
|
||||
orte_process_info.job_session_dir,
|
||||
orca_process_info_get_job_session_dir(),
|
||||
(unsigned long)proc->proc_name.vpid);
|
||||
ep->fifo_fd = open(path, O_WRONLY);
|
||||
if(ep->fifo_fd < 0) {
|
||||
@ -1075,7 +1078,7 @@ int mca_btl_smcuda_ft_event(int state) {
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
if( orte_cr_continue_like_restart ) {
|
||||
if( orca_info_cr_continue_like_restart() ) {
|
||||
if( NULL != mca_btl_smcuda_component.sm_seg ) {
|
||||
/* Add shared memory file */
|
||||
opal_crs_base_cleanup_append(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name, false);
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2010-2011 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -45,9 +46,8 @@
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
@ -297,7 +297,7 @@ static mca_btl_base_module_t** mca_btl_smcuda_component_init(
|
||||
*num_btls = 0;
|
||||
|
||||
/* if no session directory was created, then we cannot be used */
|
||||
if (!orte_create_session_dirs) {
|
||||
if (!orca_process_info_create_session_dirs()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -308,8 +308,8 @@ static mca_btl_base_module_t** mca_btl_smcuda_component_init(
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
/* create a named pipe to receive events */
|
||||
sprintf( mca_btl_smcuda_component.sm_fifo_path,
|
||||
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir,
|
||||
(unsigned long)ORTE_PROC_MY_NAME->vpid );
|
||||
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orca_process_info_get_job_session_dir(),
|
||||
(unsigned long)orca_process_info_get_vpid(ORCA_PROC_MY_NAME) );
|
||||
if(mkfifo(mca_btl_smcuda_component.sm_fifo_path, 0660) < 0) {
|
||||
opal_output(0, "mca_btl_smcuda_component_init: mkfifo failed with errno=%d\n",errno);
|
||||
return NULL;
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Laboratory
|
||||
* Copyright (c) 2009-2012 Oak Ridge National Laboratory
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -57,9 +57,7 @@
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/types.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
@ -215,8 +213,8 @@ static int mca_btl_tcp_component_register(void)
|
||||
mca_btl_tcp_param_register_int( "port_min_v4",
|
||||
"The minimum port where the TCP BTL will try to bind (default 1024)", 1024 );
|
||||
if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) {
|
||||
orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
|
||||
true, "v4", orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
|
||||
true, "v4", orca_process_info_get_nodename(),
|
||||
mca_btl_tcp_component.tcp_port_min );
|
||||
mca_btl_tcp_component.tcp_port_min = 1024;
|
||||
}
|
||||
@ -234,8 +232,8 @@ static int mca_btl_tcp_component_register(void)
|
||||
mca_btl_tcp_param_register_int( "port_min_v6",
|
||||
"The minimum port where the TCP BTL will try to bind (default 1024)", 1024 );
|
||||
if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) {
|
||||
orte_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
|
||||
true, "v6", orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
|
||||
true, "v6", orca_process_info_get_nodename(),
|
||||
mca_btl_tcp_component.tcp6_port_min );
|
||||
mca_btl_tcp_component.tcp6_port_min = 1024;
|
||||
}
|
||||
@ -281,10 +279,10 @@ static int mca_btl_tcp_component_register(void)
|
||||
|
||||
if (NULL != argv && '\0' != *(argv[0])) {
|
||||
int if_index, rc, count;
|
||||
orte_node_rank_t node_rank;
|
||||
orca_node_rank_t node_rank;
|
||||
char name[256];
|
||||
|
||||
node_rank = orte_ess.get_node_rank(ORTE_PROC_MY_NAME);
|
||||
node_rank = orca_node_info_get_rank(ORCA_PROC_MY_NAME);
|
||||
|
||||
/* Now that we've got that local rank, take the
|
||||
corresponding entry from the tcp_if_seq list (wrapping
|
||||
@ -306,10 +304,10 @@ static int mca_btl_tcp_component_register(void)
|
||||
}
|
||||
}
|
||||
if (if_index < 0) {
|
||||
orte_show_help("help-mpi-btl-tcp.txt",
|
||||
orca_show_help("help-mpi-btl-tcp.txt",
|
||||
"invalid if_inexclude",
|
||||
true, "if_seq",
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
mca_btl_tcp_component.tcp_if_seq,
|
||||
"Interface does not exist");
|
||||
return OMPI_ERR_BAD_PARAM;
|
||||
@ -520,8 +518,8 @@ static char **split_and_resolve(char **orig_str, char *name)
|
||||
tmp = strdup(argv[i]);
|
||||
str = strchr(argv[i], '/');
|
||||
if (NULL == str) {
|
||||
orte_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||
true, name, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||
true, name, orca_process_info_get_nodename(),
|
||||
tmp, "Invalid specification (missing \"/\")");
|
||||
free(argv[i]);
|
||||
free(tmp);
|
||||
@ -537,8 +535,8 @@ static char **split_and_resolve(char **orig_str, char *name)
|
||||
free(argv[i]);
|
||||
|
||||
if (1 != ret) {
|
||||
orte_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||
true, name, orte_process_info.nodename, tmp,
|
||||
orca_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||
true, name, orca_process_info_get_nodename(), tmp,
|
||||
"Invalid specification (inet_pton() failed)");
|
||||
free(tmp);
|
||||
continue;
|
||||
@ -564,8 +562,8 @@ static char **split_and_resolve(char **orig_str, char *name)
|
||||
|
||||
/* If we didn't find a match, keep trying */
|
||||
if (if_index < 0) {
|
||||
orte_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||
true, name, orte_process_info.nodename, tmp,
|
||||
orca_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||
true, name, orca_process_info_get_nodename(), tmp,
|
||||
"Did not find interface matching this subnet");
|
||||
free(tmp);
|
||||
continue;
|
||||
@ -1128,7 +1126,7 @@ static void mca_btl_tcp_component_accept_handler( int incoming_sd,
|
||||
*/
|
||||
static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
||||
{
|
||||
orte_process_name_t guid;
|
||||
orca_process_name_t guid;
|
||||
struct sockaddr_storage addr;
|
||||
int retval;
|
||||
mca_btl_tcp_proc_t* btl_proc;
|
||||
@ -1143,7 +1141,7 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return;
|
||||
}
|
||||
ORTE_PROCESS_NAME_NTOH(guid);
|
||||
ORCA_PROCESS_NAME_NTOH(guid);
|
||||
|
||||
/* now set socket up to be non-blocking */
|
||||
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -315,9 +316,9 @@ static int mca_btl_tcp_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_en
|
||||
{
|
||||
/* send process identifier to remote endpoint */
|
||||
mca_btl_tcp_proc_t* btl_proc = mca_btl_tcp_proc_local();
|
||||
orte_process_name_t guid = btl_proc->proc_ompi->proc_name;
|
||||
orca_process_name_t guid = btl_proc->proc_ompi->proc_name;
|
||||
|
||||
ORTE_PROCESS_NAME_HTON(guid);
|
||||
ORCA_PROCESS_NAME_HTON(guid);
|
||||
if(mca_btl_tcp_endpoint_send_blocking(btl_endpoint, &guid, sizeof(guid)) !=
|
||||
sizeof(guid)) {
|
||||
return OMPI_ERR_UNREACH;
|
||||
@ -350,7 +351,7 @@ bool mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint,
|
||||
return false;
|
||||
}
|
||||
|
||||
cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
cmpval = orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&endpoint_proc->proc_ompi->proc_name,
|
||||
&this_proc->proc_ompi->proc_name);
|
||||
if((btl_endpoint->endpoint_sd < 0) ||
|
||||
@ -471,19 +472,19 @@ static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpo
|
||||
*/
|
||||
static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
|
||||
{
|
||||
orte_process_name_t guid;
|
||||
orca_process_name_t guid;
|
||||
mca_btl_tcp_proc_t* btl_proc = btl_endpoint->endpoint_proc;
|
||||
|
||||
if((mca_btl_tcp_endpoint_recv_blocking(btl_endpoint, &guid, sizeof(orte_process_name_t))) != sizeof(orte_process_name_t)) {
|
||||
if((mca_btl_tcp_endpoint_recv_blocking(btl_endpoint, &guid, sizeof(orca_process_name_t))) != sizeof(orca_process_name_t)) {
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
ORTE_PROCESS_NAME_NTOH(guid);
|
||||
ORCA_PROCESS_NAME_NTOH(guid);
|
||||
/* compare this to the expected values */
|
||||
if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
if (OPAL_EQUAL != orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&btl_proc->proc_ompi->proc_name,
|
||||
&guid)) {
|
||||
BTL_ERROR(("received unexpected process identifier %s",
|
||||
ORTE_NAME_PRINT(&guid)));
|
||||
ORCA_NAME_PRINT(&guid)));
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
@ -570,7 +571,7 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo
|
||||
|
||||
opal_output_verbose(20, mca_btl_base_output,
|
||||
"btl: tcp: attempting to connect() to %s address %s on port %d",
|
||||
ORTE_NAME_PRINT(&btl_endpoint->endpoint_proc->proc_ompi->proc_name),
|
||||
ORCA_NAME_PRINT(&btl_endpoint->endpoint_proc->proc_ompi->proc_name),
|
||||
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
||||
ntohs(btl_endpoint->endpoint_addr->addr_port));
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -76,7 +77,7 @@ void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* tcp_proc)
|
||||
/* remove from list of all proc instances */
|
||||
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
||||
opal_hash_table_remove_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
||||
orte_util_hash_name(&tcp_proc->proc_ompi->proc_name));
|
||||
orca_process_info_hash_name(&tcp_proc->proc_ompi->proc_name));
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||
|
||||
/* release resources */
|
||||
@ -99,7 +100,7 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
|
||||
int rc;
|
||||
size_t size;
|
||||
mca_btl_tcp_proc_t* btl_proc;
|
||||
uint64_t hash = orte_util_hash_name(&ompi_proc->proc_name);
|
||||
uint64_t hash = orca_process_info_hash_name(&ompi_proc->proc_name);
|
||||
|
||||
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
||||
rc = opal_hash_table_get_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
||||
@ -706,12 +707,12 @@ int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_
|
||||
* Look for an existing TCP process instance based on the globally unique
|
||||
* process identifier.
|
||||
*/
|
||||
mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const orte_process_name_t *name)
|
||||
mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const orca_process_name_t *name)
|
||||
{
|
||||
mca_btl_tcp_proc_t* proc = NULL;
|
||||
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
||||
opal_hash_table_get_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
||||
orte_util_hash_name(name), (void**)&proc);
|
||||
orca_process_info_hash_name(name), (void**)&proc);
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||
return proc;
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -22,7 +23,7 @@
|
||||
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "orte/types.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "btl_tcp.h"
|
||||
#include "btl_tcp_addr.h"
|
||||
#include "btl_tcp_endpoint.h"
|
||||
@ -106,7 +107,7 @@ enum mca_btl_tcp_connection_quality {
|
||||
|
||||
|
||||
mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc);
|
||||
mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const orte_process_name_t* name);
|
||||
mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const orca_process_name_t* name);
|
||||
int mca_btl_tcp_proc_insert(mca_btl_tcp_proc_t*, mca_btl_base_endpoint_t*);
|
||||
int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t*, mca_btl_base_endpoint_t*);
|
||||
bool mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t*, struct sockaddr*, int);
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2006 Sandia National Laboratories. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -812,7 +812,7 @@ static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
|
||||
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "interface not found",
|
||||
true, orte_process_info.nodename, btl_addr_string));
|
||||
true, orca_process_info_get_nodename(), btl_addr_string));
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -826,7 +826,7 @@ static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
|
||||
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "netmask not found",
|
||||
true, orte_process_info.nodename, btl_addr_string));
|
||||
true, orca_process_info_get_nodename(), btl_addr_string));
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -840,7 +840,7 @@ static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
|
||||
/* current uDAPL BTL does not support IPv6 */
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "IPv4 only",
|
||||
true, orte_process_info.nodename));
|
||||
true, orca_process_info_get_nodename()));
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -37,7 +37,9 @@
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "btl_udapl_endpoint.h"
|
||||
@ -232,7 +234,7 @@ do { \
|
||||
#define BTL_UDAPL_VERBOSE_HELP(verbose_level, args) \
|
||||
do { \
|
||||
if (verbose_level <= mca_btl_udapl_component.udapl_verbosity) { \
|
||||
orte_show_help args; \
|
||||
orca_show_help args; \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -39,7 +39,9 @@
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "btl_udapl_endpoint.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/runtime/mpiruntime.h"
|
||||
|
||||
@ -417,7 +419,7 @@ static int mca_btl_udapl_modify_ia_list(DAT_COUNT *num_info_entries,
|
||||
char *str = opal_argv_join(mca_btl_udapl_component.if_list, ',');
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "nonexistent entry",
|
||||
true, orte_process_info.nodename,
|
||||
true, orca_process_info_get_nodename(),
|
||||
((NULL != mca_btl_udapl_component.if_include) ?
|
||||
"in" : "ex"), str));
|
||||
free(str);
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2006 Sandia National Laboratories. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -28,8 +28,8 @@
|
||||
#include "ompi/types.h"
|
||||
#include "opal/align.h"
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
|
||||
@ -46,9 +46,9 @@
|
||||
|
||||
static void mca_btl_udapl_endpoint_send_cb(
|
||||
int status,
|
||||
orte_process_name_t* endpoint,
|
||||
orca_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orca_oob_tag_t tag,
|
||||
void* cbdata);
|
||||
static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint);
|
||||
static int mca_btl_udapl_endpoint_post_recv(
|
||||
@ -57,9 +57,9 @@ static int mca_btl_udapl_endpoint_post_recv(
|
||||
void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint);
|
||||
void mca_btl_udapl_endpoint_recv(
|
||||
int status,
|
||||
orte_process_name_t* endpoint,
|
||||
orca_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orca_oob_tag_t tag,
|
||||
void* cbdata);
|
||||
static int mca_btl_udapl_endpoint_finish_eager(mca_btl_udapl_endpoint_t*);
|
||||
static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t*);
|
||||
@ -322,8 +322,8 @@ int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint,
|
||||
}
|
||||
|
||||
|
||||
static void mca_btl_udapl_endpoint_send_cb(int status, orte_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata)
|
||||
static void mca_btl_udapl_endpoint_send_cb(int status, orca_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag, void* cbdata)
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
}
|
||||
@ -523,7 +523,7 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
|
||||
int rc;
|
||||
|
||||
if(NULL == buf) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_OUT_OF_RESOURCE);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -532,21 +532,21 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
|
||||
/* Pack our address information */
|
||||
rc = opal_dss.pack(buf, &addr->port, 1, OPAL_UINT64);
|
||||
if(OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = opal_dss.pack(buf, &addr->addr, sizeof(DAT_SOCK_ADDR), OPAL_UINT8);
|
||||
if(OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Send the buffer */
|
||||
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name, buf,
|
||||
rc = orca_oob_send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name, buf,
|
||||
OMPI_RML_TAG_UDAPL, 0, mca_btl_udapl_endpoint_send_cb, NULL);
|
||||
if(0 > rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -555,8 +555,8 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
|
||||
}
|
||||
|
||||
|
||||
void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata)
|
||||
void mca_btl_udapl_endpoint_recv(int status, orca_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag, void* cbdata)
|
||||
{
|
||||
mca_btl_udapl_addr_t addr;
|
||||
mca_btl_udapl_proc_t* proc;
|
||||
@ -568,14 +568,14 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
|
||||
/* Unpack data */
|
||||
rc = opal_dss.unpack(buffer, &addr.port, &cnt, OPAL_UINT64);
|
||||
if(OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
cnt = sizeof(mca_btl_udapl_addr_t);
|
||||
rc = opal_dss.unpack(buffer, &addr.addr, &cnt, OPAL_UINT8);
|
||||
if(OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -587,7 +587,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
|
||||
opal_list_get_end(&mca_btl_udapl_component.udapl_procs);
|
||||
proc = (mca_btl_udapl_proc_t*)opal_list_get_next(proc)) {
|
||||
|
||||
if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &proc->proc_ompi->proc_name, endpoint)) {
|
||||
if(OPAL_EQUAL == orca_process_name_compare(ORCA_NAME_CMP_ALL, &proc->proc_ompi->proc_name, endpoint)) {
|
||||
for(i = 0; i < proc->proc_endpoint_count; i++) {
|
||||
ep = proc->proc_endpoints[i];
|
||||
|
||||
@ -613,8 +613,8 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
|
||||
|
||||
void mca_btl_udapl_endpoint_post_oob_recv(void)
|
||||
{
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, OMPI_RML_TAG_UDAPL,
|
||||
ORTE_RML_PERSISTENT, mca_btl_udapl_endpoint_recv, NULL);
|
||||
orca_oob_recv_buffer_nb(ORCA_NAME_WILDCARD, OMPI_RML_TAG_UDAPL,
|
||||
ORCA_OOB_PERSISTENT, mca_btl_udapl_endpoint_recv, NULL);
|
||||
}
|
||||
|
||||
|
||||
@ -631,7 +631,7 @@ void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint)
|
||||
/* Nasty test to prevent deadlock and unwanted connection attempts */
|
||||
/* This right here is the whole point of using the ORTE/RML handshake */
|
||||
if((MCA_BTL_UDAPL_CONN_EAGER == endpoint->endpoint_state &&
|
||||
0 > orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
0 > orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&endpoint->endpoint_proc->proc_ompi->proc_name,
|
||||
&ompi_proc_local()->proc_name)) ||
|
||||
(MCA_BTL_UDAPL_CLOSED != endpoint->endpoint_state &&
|
||||
@ -782,7 +782,7 @@ static int mca_btl_udapl_endpoint_finish_eager(
|
||||
}
|
||||
|
||||
/* Only one side does dat_ep_connect() */
|
||||
if(0 < orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
if(0 < orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&endpoint->endpoint_proc->proc_ompi->proc_name,
|
||||
&ompi_proc_local()->proc_name)) {
|
||||
|
||||
@ -970,7 +970,7 @@ static int mca_btl_udapl_endpoint_pd_finish_eager(
|
||||
* with this.
|
||||
*/
|
||||
if((BTL_UDAPL_NUM_CONNECTION != endpoint->endpoint_connections_completed)
|
||||
&& (0 < orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
&& (0 < orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&endpoint->endpoint_proc->proc_ompi->proc_name,
|
||||
&ompi_proc_local()->proc_name))) {
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
* Copyright (c) 2006 Sandia National Laboratories. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -131,7 +132,7 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
|
||||
("ompi_modex_recv failed for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
ORCA_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
OBJ_RELEASE(udapl_proc);
|
||||
return NULL;
|
||||
}
|
||||
@ -139,7 +140,7 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
|
||||
if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
|
||||
("invalid udapl address for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
ORCA_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
OBJ_RELEASE(udapl_proc);
|
||||
return NULL;
|
||||
}
|
||||
@ -251,14 +252,14 @@ static int mca_btl_udapl_proc_address_match(
|
||||
/* current uDAPL BTL only supports IPv4 */
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "IPv4 only",
|
||||
true, orte_process_info.nodename));
|
||||
true, orca_process_info_get_nodename()));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX == *peer_addr_idx) {
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "no network match",
|
||||
true, btl_addr_string, orte_process_info.nodename,
|
||||
true, btl_addr_string, orca_process_info_get_nodename(),
|
||||
peer_proc->proc_ompi->proc_hostname));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -172,8 +173,8 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
|
||||
|
||||
/* set file name */
|
||||
if(asprintf(&vader_ctl_file, "%s"OPAL_PATH_SEP"vader_btl_module.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename) < 0)
|
||||
orca_process_info_get_job_session_dir(),
|
||||
orca_process_info_get_nodename()) < 0)
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
/* Pass in a data segment alignment of 0 to get no data
|
||||
|
@ -15,6 +15,7 @@
|
||||
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,9 +26,8 @@
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
@ -229,7 +229,7 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
|
||||
|
||||
/* if no session directory was created, then we cannot be used */
|
||||
/* XXX LANL FIXME -- this is not the case. we can use an anonymous segment */
|
||||
if (!orte_create_session_dirs) {
|
||||
if (!orca_process_info_create_session_dirs()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2008-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,8 +28,8 @@
|
||||
#include "ompi_config.h"
|
||||
#include <string.h>
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/arch.h"
|
||||
@ -45,7 +46,7 @@
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/grdma/mpool_grdma.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
@ -114,14 +115,14 @@ void mca_btl_wv_show_init_error(const char *file, int line,
|
||||
{
|
||||
if (ENOMEM == errno) {char *str_limit = NULL;
|
||||
|
||||
orte_show_help("help-mpi-btl-wv.txt", "init-fail-no-mem",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-wv.txt", "init-fail-no-mem",
|
||||
true, orca_process_info_get_nodename(),
|
||||
file, line, func, dev, str_limit);
|
||||
|
||||
if (NULL != str_limit) free(str_limit);
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "init-fail-create-q",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-wv.txt", "init-fail-create-q",
|
||||
true, orca_process_info_get_nodename(),
|
||||
file, line, func, strerror(errno), errno, dev);
|
||||
}
|
||||
}
|
||||
@ -287,9 +288,9 @@ static int mca_btl_wv_tune_endpoint(mca_btl_wv_module_t* wv_btl,
|
||||
ompi_btl_wv_ini_values_t values;
|
||||
|
||||
if(mca_btl_wv_get_transport_type(wv_btl) != endpoint->rem_info.rem_transport_type) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"conflicting transport types", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
wv_btl->device->ib_dev->name,
|
||||
(wv_btl->device->ib_dev_attr).VendorId,
|
||||
(wv_btl->device->ib_dev_attr).VendorPartId,
|
||||
@ -308,9 +309,9 @@ static int mca_btl_wv_tune_endpoint(mca_btl_wv_module_t* wv_btl,
|
||||
|
||||
if (OMPI_SUCCESS != ret &&
|
||||
OMPI_ERR_NOT_FOUND != ret) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"error in device init", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
wv_btl->device->ib_dev->name);
|
||||
return ret;
|
||||
}
|
||||
@ -349,9 +350,9 @@ static int mca_btl_wv_tune_endpoint(mca_btl_wv_module_t* wv_btl,
|
||||
|
||||
if(0 != strcmp(mca_btl_wv_component.receive_queues,
|
||||
recv_qps)) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"unsupported queues configuration", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
wv_btl->device->ib_dev->name,
|
||||
(wv_btl->device->ib_dev_attr).VendorId,
|
||||
(wv_btl->device->ib_dev_attr).VendorPartId,
|
||||
@ -371,9 +372,9 @@ static int mca_btl_wv_tune_endpoint(mca_btl_wv_module_t* wv_btl,
|
||||
if(NULL != values.receive_queues) {
|
||||
if(0 != strcmp(mca_btl_wv_component.receive_queues,
|
||||
values.receive_queues)) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"unsupported queues configuration", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
wv_btl->device->ib_dev->name,
|
||||
(wv_btl->device->ib_dev_attr).VendorId,
|
||||
(wv_btl->device->ib_dev_attr).VendorPartId,
|
||||
@ -430,8 +431,8 @@ int mca_btl_wv_add_procs(struct mca_btl_base_module_t* btl,
|
||||
|
||||
/* OOB, XOOB, RDMACM, IBCM does not support SELF comunication, so
|
||||
* mark the prco as unreachable by wv btl */
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields
|
||||
(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, &ompi_proc->proc_name)) {
|
||||
if (OPAL_EQUAL == orca_process_name_compare
|
||||
(ORCA_NAME_CMP_ALL, ORCA_PROC_MY_NAME, &ompi_proc->proc_name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -45,9 +46,7 @@
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "opal_stdint.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
@ -516,8 +515,8 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_wv_device_t *device,
|
||||
if(mca_btl_wv_component.ib_num_btls > 0 &&
|
||||
IB_DEFAULT_GID_PREFIX == subnet_id &&
|
||||
mca_btl_wv_component.warn_default_gid_prefix) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "default subnet prefix",
|
||||
true, orte_process_info.nodename);
|
||||
orca_show_help("help-mpi-btl-wv.txt", "default subnet prefix",
|
||||
true, orca_process_info_get_nodename());
|
||||
}
|
||||
|
||||
lmc = (1 << ib_port_attr->Lmc);
|
||||
@ -1038,9 +1037,9 @@ static int setup_qps(void)
|
||||
|
||||
queues = opal_argv_split(mca_btl_wv_component.receive_queues, ':');
|
||||
if (0 == opal_argv_count(queues)) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"no qps in receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
mca_btl_wv_component.receive_queues);
|
||||
ret = OMPI_ERROR;
|
||||
goto error;
|
||||
@ -1055,9 +1054,9 @@ static int setup_qps(void)
|
||||
} else if (0 == strncmp("S,", queues[qp], 2)) {
|
||||
num_srq_qps++;
|
||||
}else {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"invalid qp type in receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
mca_btl_wv_component.receive_queues,
|
||||
queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
@ -1084,9 +1083,9 @@ static int setup_qps(void)
|
||||
if ('P' == params[0][0]) {
|
||||
int32_t rd_win, rd_rsv;
|
||||
if (count < 3 || count > 6) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"invalid pp qp specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1109,15 +1108,15 @@ static int setup_qps(void)
|
||||
mca_btl_wv_component.qp_infos[qp].u.pp_qp.rd_win = rd_win;
|
||||
mca_btl_wv_component.qp_infos[qp].u.pp_qp.rd_rsv = rd_rsv;
|
||||
if ((rd_num - rd_low) > rd_win) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "non optimal rd_win",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "non optimal rd_win",
|
||||
true, rd_win, rd_num - rd_low);
|
||||
}
|
||||
} else {
|
||||
int32_t sd_max, rd_init, srq_limit;
|
||||
if (count < 3 || count > 7) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"invalid srq specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1146,15 +1145,15 @@ static int setup_qps(void)
|
||||
}
|
||||
|
||||
if (rd_num < rd_init) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "rd_num must be >= rd_init",
|
||||
true, orte_process_info.nodename, queues[qp]);
|
||||
orca_show_help("help-mpi-btl-wv.txt", "rd_num must be >= rd_init",
|
||||
true, orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (rd_num < srq_limit) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "srq_limit must be > rd_num",
|
||||
true, orte_process_info.nodename, queues[qp]);
|
||||
orca_show_help("help-mpi-btl-wv.txt", "srq_limit must be > rd_num",
|
||||
true, orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1165,8 +1164,8 @@ static int setup_qps(void)
|
||||
}
|
||||
|
||||
if (rd_num <= rd_low) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "rd_num must be > rd_low",
|
||||
true, orte_process_info.nodename, queues[qp]);
|
||||
orca_show_help("help-mpi-btl-wv.txt", "rd_num must be > rd_low",
|
||||
true, orca_process_info_get_nodename(), queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1185,23 +1184,23 @@ static int setup_qps(void)
|
||||
mca_btl_wv_module.super.btl_eager_limit :
|
||||
mca_btl_wv_module.super.btl_max_send_size;
|
||||
if (max_qp_size < max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"biggest qp size is too small", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
orca_process_info_get_nodename(), max_qp_size,
|
||||
max_size_needed);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
} else if (max_qp_size > max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"biggest qp size is too big", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
orca_process_info_get_nodename(), max_qp_size,
|
||||
max_size_needed);
|
||||
}
|
||||
|
||||
if (mca_btl_wv_component.ib_free_list_max > 0 &&
|
||||
min_freelist_size > mca_btl_wv_component.ib_free_list_max) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "freelist too small", true,
|
||||
orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-wv.txt", "freelist too small", true,
|
||||
orca_process_info_get_nodename(),
|
||||
mca_btl_wv_component.ib_free_list_max,
|
||||
min_freelist_size);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
@ -1317,9 +1316,9 @@ static int init_one_device(opal_list_t *btl_list, struct wv_device* ib_dev)
|
||||
warning that we're using default values (unless overridden
|
||||
that we don't want to see these warnings) */
|
||||
if (mca_btl_wv_component.warn_no_device_params_found) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"no device params found", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
device->ib_dev->name,
|
||||
device->ib_dev_attr.VendorId,
|
||||
device->ib_dev_attr.VendorPartId);
|
||||
@ -1492,7 +1491,7 @@ static int init_one_device(opal_list_t *btl_list, struct wv_device* ib_dev)
|
||||
if (device->btls > 0) {
|
||||
/* if apm was enabled it should be > 1 */
|
||||
if (1 == mca_btl_wv_component.apm_ports) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"apm not enough ports", true);
|
||||
mca_btl_wv_component.apm_ports = 0;
|
||||
}
|
||||
@ -1751,10 +1750,10 @@ static int init_one_device(opal_list_t *btl_list, struct wv_device* ib_dev)
|
||||
if (NULL != values.receive_queues) {
|
||||
if (0 != strcmp(values.receive_queues,
|
||||
mca_btl_wv_component.receive_queues)) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"locally conflicting receive_queues", true,
|
||||
opal_install_dirs.pkgdatadir,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
receive_queues_device->ib_dev->name,
|
||||
receive_queues_device->ib_dev_attr.VendorId,
|
||||
receive_queues_device->ib_dev_attr.VendorPartId,
|
||||
@ -1775,10 +1774,10 @@ static int init_one_device(opal_list_t *btl_list, struct wv_device* ib_dev)
|
||||
device's INI file, we must error. */
|
||||
else if (BTL_WV_RQ_SOURCE_DEVICE_INI ==
|
||||
mca_btl_wv_component.receive_queues_source) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"locally conflicting receive_queues", true,
|
||||
opal_install_dirs.pkgdatadir,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
receive_queues_device->ib_dev->name,
|
||||
receive_queues_device->ib_dev_attr.VendorId,
|
||||
receive_queues_device->ib_dev_attr.VendorPartId,
|
||||
@ -1809,9 +1808,9 @@ error:
|
||||
}
|
||||
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"error in device init", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
device->ib_dev->name);
|
||||
}
|
||||
device->ib_dev_context->device_if->Release();
|
||||
@ -2093,7 +2092,7 @@ sort_devs_by_distance(struct wv_device **ib_devs, int count)
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
devs[i].ib_dev = ib_devs[i];
|
||||
if (orte_proc_is_bound) {
|
||||
if (orca_process_info_is_bound()) {
|
||||
/* If this process is bound to one or more PUs, we can get
|
||||
an accurate distance. */
|
||||
devs[i].distance = get_ib_dev_distance(ib_devs[i]);
|
||||
@ -2282,7 +2281,7 @@ btl_wv_component_init(int *num_btl_modules,
|
||||
list_count++;
|
||||
|
||||
if (list_count > 1) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"specified include and exclude", true,
|
||||
NULL == mca_btl_wv_component.if_include ?
|
||||
"<not specified>" : mca_btl_wv_component.if_include,
|
||||
@ -2335,8 +2334,8 @@ btl_wv_component_init(int *num_btl_modules,
|
||||
}
|
||||
free(dev_sorted);
|
||||
if (!found) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "no devices right type",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-wv.txt", "no devices right type",
|
||||
true, orca_process_info_get_nodename(),
|
||||
((BTL_WV_DT_IB == mca_btl_wv_component.device_type) ?
|
||||
"InfiniBand" :
|
||||
(BTL_WV_DT_IWARP == mca_btl_wv_component.device_type) ?
|
||||
@ -2352,16 +2351,16 @@ btl_wv_component_init(int *num_btl_modules,
|
||||
if (0 != opal_argv_count(mca_btl_wv_component.if_list) &&
|
||||
mca_btl_wv_component.warn_nonexistent_if) {
|
||||
char *str = opal_argv_join(mca_btl_wv_component.if_list, ',');
|
||||
orte_show_help("help-mpi-btl-wv.txt", "nonexistent port",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-wv.txt", "nonexistent port",
|
||||
true, orca_process_info_get_nodename(),
|
||||
((NULL != mca_btl_wv_component.if_include) ?
|
||||
"in" : "ex"), str);
|
||||
free(str);
|
||||
}
|
||||
|
||||
if(0 == mca_btl_wv_component.ib_num_btls) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
"no active ports found", true, orte_process_info.nodename);
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"no active ports found", true, orca_process_info_get_nodename());
|
||||
goto no_btls;
|
||||
}
|
||||
|
||||
@ -2450,9 +2449,9 @@ btl_wv_component_init(int *num_btl_modules,
|
||||
/* Do finial init on device */
|
||||
ret = prepare_device_for_use(device);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"error in device init", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
device->ib_dev->name);
|
||||
goto no_btls;
|
||||
}
|
||||
@ -2974,16 +2973,16 @@ error:
|
||||
(endpoint->qps[qp].qp->lcl_qp->context->device->name);
|
||||
|
||||
if (WvWcRnrRetryError == wc->Status) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
BTL_WV_QP_TYPE_PP(qp) ?
|
||||
"pp rnr retry exceeded" :
|
||||
"srq rnr retry exceeded", true,
|
||||
orte_process_info.nodename, device_name,
|
||||
orca_process_info_get_nodename(), device_name,
|
||||
peer_hostname);
|
||||
} else if (-2 == wc->Status) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"pp retry exceeded", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
device_name, peer_hostname);
|
||||
}
|
||||
}
|
||||
|
@ -17,7 +17,7 @@
|
||||
* Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -34,7 +34,7 @@
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/types.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
@ -930,9 +930,9 @@ void *mca_btl_wv_endpoint_invoke_error(void *context)
|
||||
|
||||
/* If we didn't find a BTL, then just bail :-( */
|
||||
if (NULL == btl || NULL == btl->error_cb) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"cannot raise btl error", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
__FILE__, __LINE__);
|
||||
exit(1);
|
||||
}
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2008 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -30,7 +31,8 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "btl_wv.h"
|
||||
@ -241,7 +243,7 @@ static int parse_file(char *filename)
|
||||
ini_filename = filename;
|
||||
btl_wv_ini_yyin = fopen(filename, "r");
|
||||
if (NULL == btl_wv_ini_yyin) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "ini file:file not found",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "ini file:file not found",
|
||||
true, filename);
|
||||
ret = OMPI_ERR_NOT_FOUND;
|
||||
goto cleanup;
|
||||
@ -418,7 +420,7 @@ static int parse_line(parsed_section_values_t *sv)
|
||||
/* Have no idea what this parameter is. Not an error -- just
|
||||
ignore it */
|
||||
if (!showed_unknown_field_warning) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"ini file:unknown field", true,
|
||||
ini_filename, btl_wv_ini_yynewlines,
|
||||
key_buffer);
|
||||
@ -687,7 +689,7 @@ static inline void show_help(const char *topic)
|
||||
if (0 == strcmp("\n", btl_wv_ini_yytext)) {
|
||||
btl_wv_ini_yytext = "<end of line>";
|
||||
}
|
||||
orte_show_help("help-mpi-btl-wv.txt", topic, true,
|
||||
orca_show_help("help-mpi-btl-wv.txt", topic, true,
|
||||
ini_filename, btl_wv_ini_yynewlines,
|
||||
btl_wv_ini_yytext);
|
||||
btl_wv_ini_yytext = save;
|
||||
|
@ -15,6 +15,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -30,7 +31,9 @@
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "btl_wv.h"
|
||||
#include "btl_wv_mca.h"
|
||||
#include "btl_wv_ini.h"
|
||||
@ -166,9 +169,9 @@ int btl_wv_register_mca_params(void)
|
||||
"(negative = try to enable fork support, but continue even if it is not available, 0 = do not enable fork support, positive = try to enable fork support and fail if it is not available)",
|
||||
ival2, &ival, 0));
|
||||
if (0 != ival) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"ib_fork requested but not supported", true,
|
||||
orte_process_info.nodename);
|
||||
orca_process_info_get_nodename());
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -196,9 +199,9 @@ int btl_wv_register_mca_params(void)
|
||||
} else if (0 == strcasecmp(str, "all")) {
|
||||
mca_btl_wv_component.device_type = BTL_WV_DT_ALL;
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
orca_show_help("help-mpi-btl-wv.txt",
|
||||
"ib_fork requested but not supported", true,
|
||||
orte_process_info.nodename);
|
||||
orca_process_info_get_nodename());
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
free(str);
|
||||
@ -282,7 +285,7 @@ int btl_wv_register_mca_params(void)
|
||||
CHECK(reg_int("mtu", "ib_mtu", msg, WV_MTU_1024, &ival, 0));
|
||||
free(msg);
|
||||
if (ival < WV_MTU_1024 || ival > WV_MTU_4096) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "invalid value for btl_wv_ib_mtu",
|
||||
"btl_wv_ib_mtu reset to 1024");
|
||||
mca_btl_wv_component.ib_mtu = WV_MTU_1024;
|
||||
@ -295,12 +298,12 @@ int btl_wv_register_mca_params(void)
|
||||
"(must be >= 0 and <= 31)",
|
||||
25, &ival, 0));
|
||||
if (ival > 31) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_min_rnr_timer > 31",
|
||||
"btl_wv_ib_min_rnr_timer reset to 31");
|
||||
ival = 31;
|
||||
} else if (ival < 0){
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_min_rnr_timer < 0",
|
||||
"btl_wv_ib_min_rnr_timer reset to 0");
|
||||
ival = 0;
|
||||
@ -312,12 +315,12 @@ int btl_wv_register_mca_params(void)
|
||||
"(must be >= 0 and <= 31)",
|
||||
20, &ival, 0));
|
||||
if (ival > 31) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_timeout > 31",
|
||||
"btl_wv_ib_timeout reset to 31");
|
||||
ival = 31;
|
||||
} else if (ival < 0) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_timeout < 0",
|
||||
"btl_wv_ib_timeout reset to 0");
|
||||
ival = 0;
|
||||
@ -329,12 +332,12 @@ int btl_wv_register_mca_params(void)
|
||||
"(must be >= 0 and <= 7)",
|
||||
7, &ival, 0));
|
||||
if (ival > 7) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_retry_count > 7",
|
||||
"btl_wv_ib_retry_count reset to 7");
|
||||
ival = 7;
|
||||
} else if (ival < 0) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_retry_count < 0",
|
||||
"btl_wv_ib_retry_count reset to 0");
|
||||
ival = 0;
|
||||
@ -349,12 +352,12 @@ int btl_wv_register_mca_params(void)
|
||||
"(must be >= 0 and <= 7; 7 = \"infinite\")",
|
||||
7, &ival, 0));
|
||||
if (ival > 7) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_rnr_retry > 7",
|
||||
"btl_wv_ib_rnr_retry reset to 7");
|
||||
ival = 7;
|
||||
} else if (ival < 0) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_rnr_retry < 0",
|
||||
"btl_wv_ib_rnr_retry reset to 0");
|
||||
ival = 0;
|
||||
@ -365,12 +368,12 @@ int btl_wv_register_mca_params(void)
|
||||
"(must be >= 0 and <= 15)",
|
||||
0, &ival, 0));
|
||||
if (ival > 15) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_service_level > 15",
|
||||
"btl_wv_ib_service_level reset to 15");
|
||||
ival = 15;
|
||||
} else if (ival < 0) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
orca_show_help("help-mpi-btl-wv.txt", "invalid mca param value",
|
||||
true, "btl_wv_ib_service_level < 0",
|
||||
"btl_wv_ib_service_level reset to 0");
|
||||
ival = 0;
|
||||
@ -427,8 +430,8 @@ int btl_wv_register_mca_params(void)
|
||||
"(must be > 0 and power of two)",
|
||||
64, &ival, REGINT_GE_ZERO));
|
||||
if(ival <= 1 || (ival & (ival - 1))) {
|
||||
orte_show_help("help-mpi-btl-wv.txt", "wrong buffer alignment",
|
||||
true, ival, orte_process_info.nodename, 64);
|
||||
orca_show_help("help-mpi-btl-wv.txt", "wrong buffer alignment",
|
||||
true, ival, orca_process_info_get_nodename(), 64);
|
||||
mca_btl_wv_component.buffer_alignment = 64;
|
||||
} else {
|
||||
mca_btl_wv_component.buffer_alignment = (uint32_t) ival;
|
||||
|
@ -11,6 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -66,7 +67,7 @@ struct mca_btl_wv_proc_t {
|
||||
ompi_proc_t *proc_ompi;
|
||||
|
||||
/** globally unique identifier for the process */
|
||||
orte_process_name_t proc_guid;
|
||||
orca_process_name_t proc_guid;
|
||||
|
||||
/** modex messages from this proc; one for each port in the peer */
|
||||
mca_btl_wv_proc_modex_t *proc_ports;
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2007 Mellanox Technologies, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -18,7 +18,8 @@
|
||||
#include "connect/btl_wv_connect_oob.h"
|
||||
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
@ -85,9 +86,9 @@ int ompi_btl_wv_connect_base_register(void)
|
||||
}
|
||||
}
|
||||
if (NULL == all[i]) {
|
||||
orte_show_help("help-mpi-btl-wv-cpc-base.txt",
|
||||
orca_show_help("help-mpi-btl-wv-cpc-base.txt",
|
||||
"cpc name not found", true,
|
||||
"include", orte_process_info.nodename,
|
||||
"include", orca_process_info_get_nodename(),
|
||||
"include", cpc_include, temp[j],
|
||||
all_cpc_names);
|
||||
opal_argv_free(temp);
|
||||
@ -111,9 +112,9 @@ int ompi_btl_wv_connect_base_register(void)
|
||||
}
|
||||
}
|
||||
if (NULL == all[i]) {
|
||||
orte_show_help("help-mpi-btl-wv-cpc-base.txt",
|
||||
orca_show_help("help-mpi-btl-wv-cpc-base.txt",
|
||||
"cpc name not found", true,
|
||||
"exclude", orte_process_info.nodename,
|
||||
"exclude", orca_process_info_get_nodename(),
|
||||
"exclude", cpc_exclude, temp[j],
|
||||
all_cpc_names);
|
||||
opal_argv_free(temp);
|
||||
@ -257,9 +258,9 @@ int ompi_btl_wv_connect_base_select_for_local_port(mca_btl_wv_module_t *btl)
|
||||
|
||||
/* If we got an empty array, then no CPCs were eligible. Doh! */
|
||||
if (0 == cpc_index) {
|
||||
orte_show_help("help-mpi-btl-wv-cpc-base.txt",
|
||||
orca_show_help("help-mpi-btl-wv-cpc-base.txt",
|
||||
"no cpcs for port", true,
|
||||
orte_process_info.nodename,
|
||||
orca_process_info_get_nodename(),
|
||||
btl->device->ib_dev->name,
|
||||
btl->port_num, msg);
|
||||
free(cpcs);
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,20 +25,17 @@
|
||||
#include "ompi_config.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal_stdint.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
#include "btl_wv.h"
|
||||
#include "btl_wv_endpoint.h"
|
||||
#include "btl_wv_proc.h"
|
||||
#include "connect/connect.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include <rdma/winverbs.h>
|
||||
#include <malloc.h>
|
||||
|
||||
@ -170,11 +167,11 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
uint8_t message_type);
|
||||
|
||||
static void rml_send_cb(int status, orte_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
static void rml_send_cb(int status, orca_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata);
|
||||
static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
static void rml_recv_cb(int status, orca_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata);
|
||||
static int init_ud_qp(struct wv_context *context_arg,
|
||||
struct mca_btl_wv_sa_qp_cache *cache);
|
||||
@ -242,12 +239,12 @@ static int oob_component_query(mca_btl_wv_module_t *btl,
|
||||
ensure to only post it *once*, because another btl may have
|
||||
come in before this and already posted it. */
|
||||
if (!rml_recv_posted) {
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
rc = orca_oob_recv_buffer_nb(ORCA_NAME_WILDCARD,
|
||||
OMPI_RML_TAG_OPENIB,
|
||||
ORTE_RML_PERSISTENT,
|
||||
ORCA_OOB_PERSISTENT,
|
||||
rml_recv_cb,
|
||||
NULL);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"wv BTL: oob CPC system error %d (%s)",
|
||||
rc, opal_strerror(rc));
|
||||
@ -258,7 +255,7 @@ static int oob_component_query(mca_btl_wv_module_t *btl,
|
||||
|
||||
*cpc = (ompi_btl_wv_connect_base_module_t *) malloc(sizeof(ompi_btl_wv_connect_base_module_t));
|
||||
if (NULL == *cpc) {
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
|
||||
orca_oob_recv_cancel(ORCA_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
|
||||
rml_recv_posted = false;
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"wv BTL: oob CPC system error (malloc failed)");
|
||||
@ -314,7 +311,7 @@ static int oob_module_start_connect(ompi_btl_wv_connect_base_module_t *cpc,
|
||||
static int oob_component_finalize(void)
|
||||
{
|
||||
if (rml_recv_posted) {
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
|
||||
orca_oob_recv_cancel(ORCA_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
|
||||
rml_recv_posted = false;
|
||||
}
|
||||
|
||||
@ -613,7 +610,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
int rc;
|
||||
|
||||
if (NULL == buffer) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_OUT_OF_RESOURCE);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -621,14 +618,14 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8));
|
||||
rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64));
|
||||
rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -639,13 +636,13 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
&endpoint->rem_info.rem_qps[0].rem_qp_num, 1,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
|
||||
rc = opal_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
@ -658,14 +655,14 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num,
|
||||
1, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
|
||||
rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
@ -673,30 +670,30 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
|
||||
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
|
||||
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->device->mtu, 1,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
|
||||
rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* send to remote endpoint */
|
||||
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid,
|
||||
rc = orca_oob_send_buffer_nb(&endpoint->endpoint_proc->proc_guid,
|
||||
buffer, OMPI_RML_TAG_OPENIB, 0,
|
||||
rml_send_cb, NULL);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (ORCA_SUCCESS != rc) {
|
||||
ORCA_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
BTL_VERBOSE(("Sent QP Info, LID = %d, SUBNET = %" PRIx64 "\n",
|
||||
@ -711,8 +708,8 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
|
||||
* Callback when we have finished RML sending the connect data to a
|
||||
* remote peer
|
||||
*/
|
||||
static void rml_send_cb(int status, orte_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
static void rml_send_cb(int status, orca_process_name_t* endpoint,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
@ -724,8 +721,8 @@ static void rml_send_cb(int status, orte_process_name_t* endpoint,
|
||||
* and if this endpoint is trying to connect, reply with our QP info,
|
||||
* otherwise try to modify QP's and establish reliable connection
|
||||
*/
|
||||
static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
static void rml_recv_cb(int status, orca_process_name_t* process_name,
|
||||
opal_buffer_t* buffer, orca_oob_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
mca_btl_wv_proc_t *ib_proc;
|
||||
@ -744,7 +741,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8));
|
||||
rc = opal_dss.unpack(buffer, &message_type, &cnt, OPAL_UINT8);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_wv_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -752,7 +749,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, OPAL_UINT64);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_wv_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -761,14 +758,14 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &lcl_qp, &cnt, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_wv_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
|
||||
rc = opal_dss.unpack(buffer, &lcl_lid, &cnt, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_wv_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -786,7 +783,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_wv_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -794,7 +791,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_psn, &cnt,
|
||||
OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_wv_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -803,21 +800,21 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_lid, &cnt, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_wv_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_wv_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
|
||||
rc = opal_dss.unpack(buffer, &rem_info.rem_index, &cnt, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORCA_ERROR_LOG(rc);
|
||||
mca_btl_wv_endpoint_invoke_error(NULL);
|
||||
return;
|
||||
}
|
||||
@ -827,7 +824,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
rem_info.rem_lid,
|
||||
rem_info.rem_subnet_id));
|
||||
|
||||
master = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME,
|
||||
master = orca_process_name_compare(ORCA_NAME_CMP_ALL, ORCA_PROC_MY_NAME,
|
||||
process_name) > 0 ? true : false;
|
||||
|
||||
/* Need to protect the ib_procs list */
|
||||
@ -840,7 +837,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
ib_proc = (mca_btl_wv_proc_t*)opal_list_get_next(ib_proc)) {
|
||||
bool found = false;
|
||||
|
||||
if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
if (orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&ib_proc->proc_guid, process_name) != OPAL_EQUAL) {
|
||||
continue;
|
||||
}
|
||||
@ -1021,8 +1018,8 @@ static int init_ud_qp(struct wv_context *context_arg,
|
||||
cache->cq->cqe = (uint32_t) entries;
|
||||
if (NULL == cache->cq) {
|
||||
BTL_ERROR(("error creating cq, errno says %s", strerror(errno)));
|
||||
orte_show_help("help-mpi-btl-wv.txt", "init-fail-create-q",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-btl-wv.txt", "init-fail-create-q",
|
||||
true, orca_process_info_get_nodename(),
|
||||
__FILE__, __LINE__, "create_cq",
|
||||
strerror(errno), errno,
|
||||
context_arg->device->name);
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include "mpi.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "opal/mca/mca.h"
|
||||
@ -118,7 +118,7 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
|
||||
collective modules available, then print error and return. */
|
||||
if (NULL == selectable) {
|
||||
/* There's no modules available */
|
||||
orte_show_help("help-mca-coll-base",
|
||||
orca_show_help("help-mca-coll-base",
|
||||
"comm-select:none-available", true);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,7 +28,7 @@
|
||||
#include "ompi/constants.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
@ -130,7 +131,7 @@ int mca_coll_base_find_available(bool enable_progress_threads,
|
||||
mca_coll_base_components_available_valid = false;
|
||||
opal_output_verbose(10, mca_coll_base_output,
|
||||
"coll:find_available: no coll components available!");
|
||||
orte_show_help("help-mca-base", "find-available:none-found", true,
|
||||
orca_show_help("help-mca-base", "find-available:none-found", true,
|
||||
"coll");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -26,7 +27,7 @@
|
||||
#include "mpi.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "orte/types.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -29,7 +30,7 @@
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "coll_sm.h"
|
||||
|
||||
|
||||
@ -191,13 +192,13 @@ static int sm_register(void)
|
||||
cs->sm_tree_degree,
|
||||
&cs->sm_tree_degree);
|
||||
if (cs->sm_tree_degree > cs->sm_control_size) {
|
||||
orte_show_help("help-mpi-coll-sm.txt",
|
||||
orca_show_help("help-mpi-coll-sm.txt",
|
||||
"tree-degree-larger-than-control", true,
|
||||
cs->sm_tree_degree, cs->sm_control_size);
|
||||
cs->sm_tree_degree = cs->sm_control_size;
|
||||
}
|
||||
if (cs->sm_tree_degree > 255) {
|
||||
orte_show_help("help-mpi-coll-sm.txt",
|
||||
orca_show_help("help-mpi-coll-sm.txt",
|
||||
"tree-degree-larger-than-255", true,
|
||||
cs->sm_tree_degree);
|
||||
cs->sm_tree_degree = 255;
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -51,8 +52,7 @@
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "opal/util/os_path.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/group/group.h"
|
||||
@ -528,33 +528,33 @@ static int bootstrap_comm(ompi_communicator_t *comm,
|
||||
int num_in_use = c->sm_comm_num_in_use_flags;
|
||||
int frag_size = c->sm_fragment_size;
|
||||
int control_size = c->sm_control_size;
|
||||
orte_process_name_t *lowest_name = NULL;
|
||||
orca_process_name_t *lowest_name = NULL;
|
||||
size_t size;
|
||||
ompi_proc_t *proc;
|
||||
|
||||
/* Make the rendezvous filename for this communicators shmem data
|
||||
segment. The CID is not guaranteed to be unique among all
|
||||
procs on this node, so also pair it with the PID of the proc
|
||||
with the lowest ORTE name to form a unique filename. */
|
||||
with the lowest RTE name to form a unique filename. */
|
||||
proc = ompi_group_peer_lookup(comm->c_local_group, 0);
|
||||
lowest_name = &(proc->proc_name);
|
||||
for (i = 1; i < comm_size; ++i) {
|
||||
proc = ompi_group_peer_lookup(comm->c_local_group, i);
|
||||
if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
if (orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&(proc->proc_name),
|
||||
lowest_name) < 0) {
|
||||
lowest_name = &(proc->proc_name);
|
||||
}
|
||||
}
|
||||
asprintf(&shortpath, "coll-sm-cid-%d-name-%s.mmap", comm->c_contextid,
|
||||
ORTE_NAME_PRINT(lowest_name));
|
||||
ORCA_NAME_PRINT(lowest_name));
|
||||
if (NULL == shortpath) {
|
||||
opal_output_verbose(10, mca_coll_base_output,
|
||||
"coll:sm:enable:bootstrap comm (%d/%s): asprintf failed",
|
||||
comm->c_contextid, comm->c_name);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
fullpath = opal_os_path(false, orte_process_info.job_session_dir,
|
||||
fullpath = opal_os_path(false, orca_process_info_get_job_session_dir(),
|
||||
shortpath, NULL);
|
||||
free(shortpath);
|
||||
if (NULL == fullpath) {
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -35,8 +36,7 @@
|
||||
#include "opal/datatype/opal_datatype_cuda.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "common_cuda.h"
|
||||
|
||||
static bool common_cuda_initialized = false;
|
||||
@ -175,10 +175,10 @@ static int mca_common_cuda_init(void)
|
||||
/* Check for the not initialized error since we can make suggestions to
|
||||
* user for this error. */
|
||||
if (CUDA_ERROR_NOT_INITIALIZED == res) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent failed not initialized",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent failed not initialized",
|
||||
true);
|
||||
} else {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent failed",
|
||||
true, res);
|
||||
}
|
||||
}
|
||||
@ -186,7 +186,7 @@ static int mca_common_cuda_init(void)
|
||||
mca_common_cuda_register_memory = false;
|
||||
} else if ((CUDA_SUCCESS == res) && (NULL == cuContext)) {
|
||||
if (mca_common_cuda_warning) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent returned NULL",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent returned NULL",
|
||||
true);
|
||||
}
|
||||
mca_common_cuda_enabled = false;
|
||||
@ -211,7 +211,7 @@ static int mca_common_cuda_init(void)
|
||||
|
||||
cuda_event_ipc_array = (CUevent *) malloc(sizeof(CUevent) * cuda_event_max);
|
||||
if (NULL == cuda_event_ipc_array) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
true, errno, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -220,7 +220,7 @@ static int mca_common_cuda_init(void)
|
||||
for (i = 0; i < cuda_event_max; i++) {
|
||||
res = cuEventCreate(&cuda_event_ipc_array[i], CU_EVENT_DISABLE_TIMING);
|
||||
if (CUDA_SUCCESS != res) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
|
||||
true, res);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -231,7 +231,7 @@ static int mca_common_cuda_init(void)
|
||||
cuda_event_ipc_frag_array = (struct mca_btl_base_descriptor_t **)
|
||||
malloc(sizeof(struct mca_btl_base_descriptor_t *) * cuda_event_max);
|
||||
if (NULL == cuda_event_ipc_frag_array) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
true, errno, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -249,7 +249,7 @@ static int mca_common_cuda_init(void)
|
||||
|
||||
cuda_event_dtoh_array = (CUevent *) malloc(sizeof(CUevent) * cuda_event_max);
|
||||
if (NULL == cuda_event_dtoh_array) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
true, errno, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -258,7 +258,7 @@ static int mca_common_cuda_init(void)
|
||||
for (i = 0; i < cuda_event_max; i++) {
|
||||
res = cuEventCreate(&cuda_event_dtoh_array[i], CU_EVENT_DISABLE_TIMING);
|
||||
if (CUDA_SUCCESS != res) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
|
||||
true, res);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -269,7 +269,7 @@ static int mca_common_cuda_init(void)
|
||||
cuda_event_dtoh_frag_array = (struct mca_btl_base_descriptor_t **)
|
||||
malloc(sizeof(struct mca_btl_base_descriptor_t *) * cuda_event_max);
|
||||
if (NULL == cuda_event_dtoh_frag_array) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
true, errno, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -284,7 +284,7 @@ static int mca_common_cuda_init(void)
|
||||
|
||||
cuda_event_htod_array = (CUevent *) malloc(sizeof(CUevent) * cuda_event_max);
|
||||
if (NULL == cuda_event_htod_array) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
true, errno, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -293,7 +293,7 @@ static int mca_common_cuda_init(void)
|
||||
for (i = 0; i < cuda_event_max; i++) {
|
||||
res = cuEventCreate(&cuda_event_htod_array[i], CU_EVENT_DISABLE_TIMING);
|
||||
if (CUDA_SUCCESS != res) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
|
||||
true, res);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -304,7 +304,7 @@ static int mca_common_cuda_init(void)
|
||||
cuda_event_htod_frag_array = (struct mca_btl_base_descriptor_t **)
|
||||
malloc(sizeof(struct mca_btl_base_descriptor_t *) * cuda_event_max);
|
||||
if (NULL == cuda_event_htod_frag_array) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||
true, errno, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -319,9 +319,9 @@ static int mca_common_cuda_init(void)
|
||||
if (res != CUDA_SUCCESS) {
|
||||
/* If registering the memory fails, print a message and continue.
|
||||
* This is not a fatal error. */
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuMemHostRegister failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuMemHostRegister failed",
|
||||
true, mem_reg->ptr, mem_reg->amount,
|
||||
orte_process_info.nodename, res, mem_reg->msg);
|
||||
orca_process_info_get_nodename(), res, mem_reg->msg);
|
||||
} else {
|
||||
opal_output_verbose(20, mca_common_cuda_output,
|
||||
"CUDA: cuMemHostRegister OK on mpool %s: "
|
||||
@ -336,7 +336,7 @@ static int mca_common_cuda_init(void)
|
||||
/* Create stream for use in ipc asynchronous copies */
|
||||
res = cuStreamCreate(&ipcStream, 0);
|
||||
if (res != CUDA_SUCCESS) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
|
||||
true, res);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -344,7 +344,7 @@ static int mca_common_cuda_init(void)
|
||||
/* Create stream for use in dtoh asynchronous copies */
|
||||
res = cuStreamCreate(&dtohStream, 0);
|
||||
if (res != CUDA_SUCCESS) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
|
||||
true, res);
|
||||
return OMPI_ERROR;
|
||||
|
||||
@ -353,7 +353,7 @@ static int mca_common_cuda_init(void)
|
||||
/* Create stream for use in htod asynchronous copies */
|
||||
res = cuStreamCreate(&htodStream, 0);
|
||||
if (res != CUDA_SUCCESS) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
|
||||
true, res);
|
||||
return OMPI_ERROR;
|
||||
|
||||
@ -393,9 +393,9 @@ void mca_common_cuda_register(void *ptr, size_t amount, char *msg) {
|
||||
if (res != CUDA_SUCCESS) {
|
||||
/* If registering the memory fails, print a message and continue.
|
||||
* This is not a fatal error. */
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuMemHostRegister failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuMemHostRegister failed",
|
||||
true, ptr, amount,
|
||||
orte_process_info.nodename, res, msg);
|
||||
orca_process_info_get_nodename(), res, msg);
|
||||
} else {
|
||||
opal_output_verbose(20, mca_common_cuda_output,
|
||||
"CUDA: cuMemHostRegister OK on mpool %s: "
|
||||
@ -432,9 +432,9 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
|
||||
if (res != CUDA_SUCCESS) {
|
||||
/* If unregistering the memory fails, print a message and continue.
|
||||
* This is not a fatal error. */
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuMemHostUnregister failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuMemHostUnregister failed",
|
||||
true, ptr,
|
||||
orte_process_info.nodename, res, msg);
|
||||
orca_process_info_get_nodename(), res, msg);
|
||||
} else {
|
||||
opal_output_verbose(20, mca_common_cuda_output,
|
||||
"CUDA: cuMemHostUnregister OK on mpool %s: "
|
||||
@ -473,7 +473,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
|
||||
CUDA_DUMP_MEMHANDLE((100, &memHandle, "GetMemHandle-After"));
|
||||
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuIpcGetMemHandle failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuIpcGetMemHandle failed",
|
||||
true, result, base);
|
||||
return OMPI_ERROR;
|
||||
} else {
|
||||
@ -486,7 +486,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
|
||||
* how the remote side saves the handles in a cache. */
|
||||
result = cuMemGetAddressRange(&pbase, &psize, (CUdeviceptr)base);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuMemGetAddressRange failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuMemGetAddressRange failed",
|
||||
true, result, base);
|
||||
return OMPI_ERROR;
|
||||
} else {
|
||||
@ -509,7 +509,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
|
||||
* with. */
|
||||
result = cuEventRecord((CUevent)cuda_reg->event, 0);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
true, result, base);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -561,7 +561,7 @@ int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *n
|
||||
return OMPI_ERR_WOULD_BLOCK;
|
||||
}
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuIpcOpenMemHandle failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuIpcOpenMemHandle failed",
|
||||
true, result, base);
|
||||
/* Currently, this is a non-recoverable error */
|
||||
return OMPI_ERROR;
|
||||
@ -585,7 +585,7 @@ int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
|
||||
result = cuIpcCloseMemHandle((CUdeviceptr)cuda_reg->base.alloc_base);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuIpcCloseMemHandle failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuIpcCloseMemHandle failed",
|
||||
true, result, cuda_reg->base.alloc_base);
|
||||
/* We will just continue on and hope things continue to work. */
|
||||
} else {
|
||||
@ -604,13 +604,13 @@ void mca_common_cuda_construct_event_and_handle(uint64_t **event, void **handle)
|
||||
|
||||
result = cuEventCreate((CUevent *)event, CU_EVENT_INTERPROCESS | CU_EVENT_DISABLE_TIMING);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
|
||||
true, result);
|
||||
}
|
||||
|
||||
result = cuIpcGetEventHandle((CUipcEventHandle *)handle, (CUevent)*event);
|
||||
if (CUDA_SUCCESS != result){
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuIpcGetEventHandle failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuIpcGetEventHandle failed",
|
||||
true, result);
|
||||
}
|
||||
|
||||
@ -624,7 +624,7 @@ void mca_common_cuda_destruct_event(uint64_t *event)
|
||||
|
||||
result = cuEventDestroy((CUevent)event);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventDestroy failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventDestroy failed",
|
||||
true, result);
|
||||
}
|
||||
}
|
||||
@ -645,7 +645,7 @@ void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
|
||||
|
||||
result = cuIpcOpenEventHandle(&event, evtHandle);
|
||||
if (CUDA_SUCCESS != result){
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuIpcOpenEventHandle failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuIpcOpenEventHandle failed",
|
||||
true, result);
|
||||
}
|
||||
|
||||
@ -656,21 +656,21 @@ void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
|
||||
*/
|
||||
result = cuEventRecord(event, 0);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
true, result);
|
||||
}
|
||||
/* END of Workaround */
|
||||
|
||||
result = cuStreamWaitEvent(0, event, 0);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuStreamWaitEvent failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuStreamWaitEvent failed",
|
||||
true, result);
|
||||
}
|
||||
|
||||
/* All done with this event. */
|
||||
result = cuEventDestroy(event);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventDestroy failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventDestroy failed",
|
||||
true, result);
|
||||
}
|
||||
}
|
||||
@ -689,7 +689,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
|
||||
* return an error. The error message will tell the user to try and
|
||||
* run again, but with a larger array for storing events. */
|
||||
if (cuda_event_ipc_num_used == cuda_event_max) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "Out of cuEvent handles",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "Out of cuEvent handles",
|
||||
true, cuda_event_max, cuda_event_max+100, cuda_event_max+100);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
@ -699,7 +699,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
|
||||
if (OPAL_LIKELY(mca_common_cuda_async)) {
|
||||
result = cuMemcpyAsync((CUdeviceptr)dst, (CUdeviceptr)src, amount, ipcStream);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuMemcpyAsync failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuMemcpyAsync failed",
|
||||
true, dst, src, amount, result);
|
||||
return OMPI_ERROR;
|
||||
} else {
|
||||
@ -709,7 +709,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
|
||||
}
|
||||
result = cuEventRecord(cuda_event_ipc_array[cuda_event_ipc_first_avail], ipcStream);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
true, result);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -727,7 +727,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
|
||||
/* Mimic the async function so they use the same memcpy call. */
|
||||
result = cuMemcpyAsync((CUdeviceptr)dst, (CUdeviceptr)src, amount, ipcStream);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuMemcpyAsync failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuMemcpyAsync failed",
|
||||
true, dst, src, amount, result);
|
||||
return OMPI_ERROR;
|
||||
} else {
|
||||
@ -739,7 +739,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
|
||||
/* Record an event, then wait for it to complete with calls to cuEventQuery */
|
||||
result = cuEventRecord(cuda_event_ipc_array[cuda_event_ipc_first_avail], ipcStream);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
true, result);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -755,7 +755,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
|
||||
|
||||
result = cuEventQuery(cuda_event_ipc_array[cuda_event_ipc_first_used]);
|
||||
if ((CUDA_SUCCESS != result) && (CUDA_ERROR_NOT_READY != result)) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
true, result);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -767,7 +767,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
|
||||
}
|
||||
result = cuEventQuery(cuda_event_ipc_array[cuda_event_ipc_first_used]);
|
||||
if ((CUDA_SUCCESS != result) && (CUDA_ERROR_NOT_READY != result)) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
true, result);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -796,14 +796,14 @@ int mca_common_cuda_record_dtoh_event(char *msg, struct mca_btl_base_descriptor_
|
||||
* return an error. The error message will tell the user to try and
|
||||
* run again, but with a larger array for storing events. */
|
||||
if (cuda_event_dtoh_num_used == cuda_event_max) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "Out of cuEvent handles",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "Out of cuEvent handles",
|
||||
true, cuda_event_max, cuda_event_max+100, cuda_event_max+100);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
result = cuEventRecord(cuda_event_dtoh_array[cuda_event_dtoh_first_avail], dtohStream);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
true, result);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -831,14 +831,14 @@ int mca_common_cuda_record_htod_event(char *msg, struct mca_btl_base_descriptor_
|
||||
* return an error. The error message will tell the user to try and
|
||||
* run again, but with a larger array for storing events. */
|
||||
if (cuda_event_htod_num_used == cuda_event_max) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "Out of cuEvent handles",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "Out of cuEvent handles",
|
||||
true, cuda_event_max, cuda_event_max+100, cuda_event_max+100);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
result = cuEventRecord(cuda_event_htod_array[cuda_event_htod_first_avail], htodStream);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventRecord failed",
|
||||
true, result);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -890,7 +890,7 @@ int progress_one_cuda_ipc_event(struct mca_btl_base_descriptor_t **frag) {
|
||||
*frag = NULL;
|
||||
return 0;
|
||||
} else if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
true, result);
|
||||
*frag = NULL;
|
||||
return OMPI_ERROR;
|
||||
@ -932,7 +932,7 @@ int progress_one_cuda_dtoh_event(struct mca_btl_base_descriptor_t **frag) {
|
||||
*frag = NULL;
|
||||
return 0;
|
||||
} else if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
true, result);
|
||||
*frag = NULL;
|
||||
return OMPI_ERROR;
|
||||
@ -974,7 +974,7 @@ int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **frag) {
|
||||
*frag = NULL;
|
||||
return 0;
|
||||
} else if (CUDA_SUCCESS != result) {
|
||||
orte_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
orca_show_help("help-mpi-common-cuda.txt", "cuEventQuery failed",
|
||||
true, result);
|
||||
*frag = NULL;
|
||||
return OMPI_ERROR;
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -46,11 +47,7 @@
|
||||
#include "opal/runtime/opal_cr.h"
|
||||
#endif
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
@ -91,7 +88,7 @@ attach_and_init(size_t size_ctl_structure,
|
||||
|
||||
/* set up the map object */
|
||||
if (NULL == (map = OBJ_NEW(mca_common_sm_module_t))) {
|
||||
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
ORCA_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -114,8 +111,8 @@ attach_and_init(size_t size_ctl_structure,
|
||||
addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *);
|
||||
/* is addr past end of the shared memory segment? */
|
||||
if ((unsigned char *)seg + shmem_ds.seg_size < addr) {
|
||||
orte_show_help("help-mpi-common-sm.txt", "mmap too small", 1,
|
||||
orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-common-sm.txt", "mmap too small", 1,
|
||||
orca_process_info_get_nodename(),
|
||||
(unsigned long)shmem_ds.seg_size,
|
||||
(unsigned long)size_ctl_structure,
|
||||
(unsigned long)data_seg_alignment);
|
||||
@ -176,8 +173,8 @@ mca_common_sm_init(ompi_proc_t **procs,
|
||||
/* if we have a new lowest, swap it with position 0
|
||||
* so that procs[0] is always the lowest named proc
|
||||
*/
|
||||
if (OPAL_VALUE2_GREATER == orte_util_compare_name_fields(
|
||||
ORTE_NS_CMP_ALL,
|
||||
if (OPAL_VALUE2_GREATER == orca_process_name_compare(
|
||||
ORCA_NAME_CMP_ALL,
|
||||
&(procs[p]->proc_name),
|
||||
&(procs[0]->proc_name))) {
|
||||
temp_proc = procs[0];
|
||||
@ -198,9 +195,9 @@ mca_common_sm_init(ompi_proc_t **procs,
|
||||
}
|
||||
|
||||
/* determine whether or not i am the lowest local process */
|
||||
lowest_local_proc = (0 == orte_util_compare_name_fields(
|
||||
ORTE_NS_CMP_ALL,
|
||||
ORTE_PROC_MY_NAME,
|
||||
lowest_local_proc = (0 == orca_process_name_compare(
|
||||
ORCA_NAME_CMP_ALL,
|
||||
ORCA_PROC_MY_NAME,
|
||||
&(procs[0]->proc_name)));
|
||||
|
||||
/* figure out if i am the lowest rank in the group.
|
||||
@ -289,7 +286,7 @@ mca_common_sm_init_group(ompi_group_t *group,
|
||||
}
|
||||
else if (NULL == (procs = (ompi_proc_t **)
|
||||
malloc(sizeof(ompi_proc_t *) * group_size))) {
|
||||
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
ORCA_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
goto out;
|
||||
}
|
||||
/* make sure that all the procs in the group are local */
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,12 +26,7 @@
|
||||
#include "opal/types.h"
|
||||
#include "opal/dss/dss.h"
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
@ -73,7 +69,7 @@ mca_common_sm_rml_info_bcast(opal_shmem_ds_t *out_ds_buf,
|
||||
* note in common_sm.c for more details. */
|
||||
tmprc = opal_dss.pack(buffer, &msg_id_str, 1, OPAL_STRING);
|
||||
if (OPAL_SUCCESS != tmprc) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_PACK_FAILURE);
|
||||
rc = OMPI_ERR_PACK_FAILURE;
|
||||
goto out;
|
||||
}
|
||||
@ -81,7 +77,7 @@ mca_common_sm_rml_info_bcast(opal_shmem_ds_t *out_ds_buf,
|
||||
(int32_t)sizeof(opal_shmem_ds_t),
|
||||
OPAL_BYTE);
|
||||
if (OPAL_SUCCESS != tmprc) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_PACK_FAILURE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_PACK_FAILURE);
|
||||
rc = OMPI_ERR_PACK_FAILURE;
|
||||
goto out;
|
||||
}
|
||||
@ -89,10 +85,10 @@ mca_common_sm_rml_info_bcast(opal_shmem_ds_t *out_ds_buf,
|
||||
/* first num_local_procs items should be local procs */
|
||||
for (p = 1; p < num_local_procs; ++p) {
|
||||
/* a potential future optimization: use non-blocking routines */
|
||||
tmprc = orte_rml.send_buffer(&(procs[p]->proc_name), buffer, tag,
|
||||
tmprc = orca_oob_send_buffer(&(procs[p]->proc_name), buffer, tag,
|
||||
0);
|
||||
if (0 > tmprc) {
|
||||
ORTE_ERROR_LOG(tmprc);
|
||||
ORCA_ERROR_LOG(tmprc);
|
||||
opal_progress_event_users_decrement();
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
@ -106,10 +102,10 @@ mca_common_sm_rml_info_bcast(opal_shmem_ds_t *out_ds_buf,
|
||||
/* bump up the libevent polling frequency while we're in this RML recv,
|
||||
* just to ensure we're checking libevent frequently. */
|
||||
opal_progress_event_users_increment();
|
||||
tmprc = orte_rml.recv_buffer(&(procs[0]->proc_name), buffer, tag, 0);
|
||||
tmprc = orca_oob_recv_buffer(&(procs[0]->proc_name), buffer, tag, 0);
|
||||
opal_progress_event_users_decrement();
|
||||
if (0 > tmprc) {
|
||||
ORTE_ERROR_LOG(tmprc);
|
||||
ORCA_ERROR_LOG(tmprc);
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
}
|
||||
@ -118,14 +114,14 @@ mca_common_sm_rml_info_bcast(opal_shmem_ds_t *out_ds_buf,
|
||||
tmprc = opal_dss.unpack(buffer, &msg_id_str_to_tx, &num_vals,
|
||||
OPAL_STRING);
|
||||
if (0 > tmprc) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_UNPACK_FAILURE);
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
}
|
||||
num_vals = (int32_t)sizeof(opal_shmem_ds_t);
|
||||
tmprc = opal_dss.unpack(buffer, out_ds_buf, &num_vals, OPAL_BYTE);
|
||||
if (0 > tmprc) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_UNPACK_FAILURE);
|
||||
ORCA_ERROR_LOG(ORCA_ERR_UNPACK_FAILURE);
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
}
|
||||
@ -134,8 +130,8 @@ mca_common_sm_rml_info_bcast(opal_shmem_ds_t *out_ds_buf,
|
||||
* outside of our current scope of assumptions. see "RML Messaging and
|
||||
* Our Assumptions" note in common_sm.c */
|
||||
if (0 != strcmp(msg_id_str_to_tx, msg_id_str)) {
|
||||
orte_show_help("help-mpi-common-sm.txt", "unexpected message id",
|
||||
true, orte_process_info.nodename,
|
||||
orca_show_help("help-mpi-common-sm.txt", "unexpected message id",
|
||||
true, orca_process_info_get_nodename(),
|
||||
msg_id_str, msg_id_str_to_tx);
|
||||
rc = OMPI_ERROR;
|
||||
/* here for extra debug info only */
|
||||
|
@ -5,6 +5,7 @@
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -31,11 +32,7 @@
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
@ -302,9 +299,9 @@ static int drain_message_copy_remove_persistent(ompi_crcp_bkmrk_pml_drain_messag
|
||||
ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref);
|
||||
|
||||
/*
|
||||
* Peer List: Find the peer reference matching the ORTE process name
|
||||
* Peer List: Find the peer reference matching the Open RTE/ORCA process name
|
||||
*/
|
||||
static ompi_crcp_bkmrk_pml_peer_ref_t* find_peer(orte_process_name_t proc);
|
||||
static ompi_crcp_bkmrk_pml_peer_ref_t* find_peer(orca_process_name_t proc);
|
||||
|
||||
/*
|
||||
* Peer List: Find the peer reference matching the index into the communicator
|
||||
@ -346,9 +343,9 @@ static int recv_bookmarks(int peer_idx);
|
||||
* Callback to receive the bookmarks from a peer
|
||||
*/
|
||||
static void recv_bookmarks_cbfunc(int status,
|
||||
orte_process_name_t* sender,
|
||||
orca_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orca_oob_tag_t tag,
|
||||
void* cbdata);
|
||||
static int total_recv_bookmarks = 0;
|
||||
|
||||
@ -419,9 +416,9 @@ static int ft_event_post_drain_acks(void);
|
||||
* Callback to service drain message acks.
|
||||
*/
|
||||
static void drain_message_ack_cbfunc(int status,
|
||||
orte_process_name_t* sender,
|
||||
orca_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orca_oob_tag_t tag,
|
||||
void* cbdata);
|
||||
|
||||
/*
|
||||
@ -700,8 +697,8 @@ OBJ_CLASS_INSTANCE(ompi_crcp_bkmrk_pml_peer_ref_t,
|
||||
ompi_crcp_bkmrk_pml_peer_ref_destruct);
|
||||
|
||||
void ompi_crcp_bkmrk_pml_peer_ref_construct(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref) {
|
||||
peer_ref->proc_name.jobid = ORTE_JOBID_INVALID;
|
||||
peer_ref->proc_name.vpid = ORTE_VPID_INVALID;
|
||||
peer_ref->proc_name.jobid = ORCA_JOBID_INVALID;
|
||||
peer_ref->proc_name.vpid = ORCA_VPID_INVALID;
|
||||
|
||||
OBJ_CONSTRUCT(&peer_ref->send_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&peer_ref->isend_list, opal_list_t);
|
||||
@ -727,8 +724,8 @@ void ompi_crcp_bkmrk_pml_peer_ref_construct(ompi_crcp_bkmrk_pml_peer_ref_t *peer
|
||||
void ompi_crcp_bkmrk_pml_peer_ref_destruct( ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref) {
|
||||
opal_list_item_t* item = NULL;
|
||||
|
||||
peer_ref->proc_name.jobid = ORTE_JOBID_INVALID;
|
||||
peer_ref->proc_name.vpid = ORTE_VPID_INVALID;
|
||||
peer_ref->proc_name.jobid = ORCA_JOBID_INVALID;
|
||||
peer_ref->proc_name.vpid = ORCA_VPID_INVALID;
|
||||
|
||||
while( NULL != (item = opal_list_remove_first(&peer_ref->send_list)) ) {
|
||||
HOKE_TRAFFIC_MSG_REF_RETURN(item);
|
||||
@ -836,8 +833,8 @@ void ompi_crcp_bkmrk_pml_traffic_message_ref_construct(ompi_crcp_bkmrk_pml_traff
|
||||
|
||||
OBJ_CONSTRUCT(&msg_ref->msg_contents, opal_list_t);
|
||||
|
||||
msg_ref->proc_name.jobid = ORTE_JOBID_INVALID;
|
||||
msg_ref->proc_name.vpid = ORTE_VPID_INVALID;
|
||||
msg_ref->proc_name.jobid = ORCA_JOBID_INVALID;
|
||||
msg_ref->proc_name.vpid = ORCA_VPID_INVALID;
|
||||
|
||||
msg_ref->matched = INVALID_INT;
|
||||
msg_ref->done = INVALID_INT;
|
||||
@ -863,8 +860,8 @@ void ompi_crcp_bkmrk_pml_traffic_message_ref_destruct( ompi_crcp_bkmrk_pml_traff
|
||||
}
|
||||
OBJ_DESTRUCT(&(msg_ref->msg_contents));
|
||||
|
||||
msg_ref->proc_name.jobid = ORTE_JOBID_INVALID;
|
||||
msg_ref->proc_name.vpid = ORTE_VPID_INVALID;
|
||||
msg_ref->proc_name.jobid = ORCA_JOBID_INVALID;
|
||||
msg_ref->proc_name.vpid = ORCA_VPID_INVALID;
|
||||
|
||||
msg_ref->matched = INVALID_INT;
|
||||
msg_ref->done = INVALID_INT;
|
||||
@ -896,8 +893,8 @@ void ompi_crcp_bkmrk_pml_drain_message_ref_construct(ompi_crcp_bkmrk_pml_drain_m
|
||||
|
||||
OBJ_CONSTRUCT(&msg_ref->msg_contents, opal_list_t);
|
||||
|
||||
msg_ref->proc_name.jobid = ORTE_JOBID_INVALID;
|
||||
msg_ref->proc_name.vpid = ORTE_VPID_INVALID;
|
||||
msg_ref->proc_name.jobid = ORCA_JOBID_INVALID;
|
||||
msg_ref->proc_name.vpid = ORCA_VPID_INVALID;
|
||||
|
||||
msg_ref->done = INVALID_INT;
|
||||
msg_ref->active = INVALID_INT;
|
||||
@ -927,8 +924,8 @@ void ompi_crcp_bkmrk_pml_drain_message_ref_destruct( ompi_crcp_bkmrk_pml_drain_m
|
||||
}
|
||||
OBJ_DESTRUCT(&(msg_ref->msg_contents));
|
||||
|
||||
msg_ref->proc_name.jobid = ORTE_JOBID_INVALID;
|
||||
msg_ref->proc_name.vpid = ORTE_VPID_INVALID;
|
||||
msg_ref->proc_name.jobid = ORCA_JOBID_INVALID;
|
||||
msg_ref->proc_name.vpid = ORCA_VPID_INVALID;
|
||||
|
||||
msg_ref->done = INVALID_INT;
|
||||
msg_ref->active = INVALID_INT;
|
||||
@ -946,15 +943,15 @@ OBJ_CLASS_INSTANCE(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t,
|
||||
void ompi_crcp_bkmrk_pml_drain_message_ack_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref) {
|
||||
msg_ack_ref->complete = false;
|
||||
|
||||
msg_ack_ref->peer.jobid = ORTE_JOBID_INVALID;
|
||||
msg_ack_ref->peer.vpid = ORTE_VPID_INVALID;
|
||||
msg_ack_ref->peer.jobid = ORCA_JOBID_INVALID;
|
||||
msg_ack_ref->peer.vpid = ORCA_VPID_INVALID;
|
||||
}
|
||||
|
||||
void ompi_crcp_bkmrk_pml_drain_message_ack_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref) {
|
||||
msg_ack_ref->complete = false;
|
||||
|
||||
msg_ack_ref->peer.jobid = ORTE_JOBID_INVALID;
|
||||
msg_ack_ref->peer.vpid = ORTE_VPID_INVALID;
|
||||
msg_ack_ref->peer.jobid = ORCA_JOBID_INVALID;
|
||||
msg_ack_ref->peer.vpid = ORCA_VPID_INVALID;
|
||||
}
|
||||
|
||||
|
||||
@ -1069,7 +1066,7 @@ do { \
|
||||
|
||||
#define UNPACK_BUFFER(buffer, var, count, type, error_msg) \
|
||||
{ \
|
||||
orte_std_cntr_t n = count; \
|
||||
orca_std_cntr_t n = count; \
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.unpack(buffer, &(var), &n, type)) ) { \
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle, \
|
||||
"%s (Return %d)", error_msg, ret); \
|
||||
@ -1487,7 +1484,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_procs(
|
||||
if(NULL == item) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: del_procs: Unable to find peer %s\n",
|
||||
ORTE_NAME_PRINT(&(procs[i]->proc_name)));
|
||||
ORCA_NAME_PRINT(&(procs[i]->proc_name)));
|
||||
exit_status = OMPI_ERROR;
|
||||
goto DONE;
|
||||
}
|
||||
@ -3036,7 +3033,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
|
||||
|
||||
if( opal_cr_timing_barrier_enabled ) {
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR0);
|
||||
orte_grpcomm.barrier();
|
||||
orca_coll_barrier(ORCA_COLL_TYPE_BARRIER_CR);
|
||||
}
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP0);
|
||||
|
||||
@ -3051,7 +3048,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
|
||||
if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_pml_quiesce_start(QUIESCE_TAG_CKPT)) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s ft_event: Checkpoint Coordination Failed %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto DONE;
|
||||
@ -3081,7 +3078,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
|
||||
first_continue_pass = !first_continue_pass;
|
||||
|
||||
/* Only finalize the Protocol after the PML has been rebuilt */
|
||||
if( orte_cr_continue_like_restart && first_continue_pass ) {
|
||||
if( orca_info_cr_continue_like_restart() && first_continue_pass ) {
|
||||
goto DONE;
|
||||
}
|
||||
|
||||
@ -3104,7 +3101,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
|
||||
|
||||
if( opal_cr_timing_barrier_enabled ) {
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR1);
|
||||
orte_grpcomm.barrier();
|
||||
orca_coll_barrier(ORCA_COLL_TYPE_BARRIER_CR);
|
||||
}
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE2);
|
||||
}
|
||||
@ -3127,7 +3124,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
|
||||
cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item;
|
||||
|
||||
/* JJH - Assuming only one global jobid at the moment */
|
||||
cur_peer_ref->proc_name.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
cur_peer_ref->proc_name.jobid = orca_process_info_get_jobid(ORCA_PROC_MY_NAME);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3230,7 +3227,7 @@ static int traffic_message_append(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
} else {
|
||||
CREATE_NEW_MSG((*msg_ref), msg_type,
|
||||
count, ddt_size, tag, dest, comm,
|
||||
ORTE_JOBID_INVALID, ORTE_VPID_INVALID);
|
||||
ORCA_JOBID_INVALID, ORCA_VPID_INVALID);
|
||||
}
|
||||
|
||||
if( msg_type == COORD_MSG_TYPE_P_SEND ||
|
||||
@ -3395,7 +3392,7 @@ static int traffic_message_find_mark_persistent(ompi_crcp_bkmrk_pml_traffic_mess
|
||||
else if( loc_breq->req_sequence == breq->req_sequence ) {
|
||||
OPAL_OUTPUT_VERBOSE((25, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"%s %8s Request [%d] (%s) %d : %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
(set_is_active ? "Start" : (NULL != c_ref ? "Drain" : "Complete")),
|
||||
(int)msg_ref->msg_id,
|
||||
(content_ref->active ? "T" : "F"),
|
||||
@ -3496,8 +3493,8 @@ static int traffic_message_create_drain_message(bool post_drain,
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
" --> Create Drain Msg: %s %4d = min(%4d / %4d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
(post_drain ? "Posting" : "Not Posting"),
|
||||
m_total, (*posted_msg_ref)->active, max_post ));
|
||||
|
||||
@ -3524,8 +3521,8 @@ static int traffic_message_create_drain_message(bool post_drain,
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
" \t--> Find Content: %s (%4d of %4d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
(post_drain ? "Posting" : "Not Posting"),
|
||||
m_iter, m_total));
|
||||
|
||||
@ -3602,8 +3599,8 @@ static int traffic_message_create_drain_message(bool post_drain,
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Added %d messages to the drained list (size = %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
(*num_posted),
|
||||
(int)opal_list_get_size(&(peer_ref->drained_list)) ));
|
||||
|
||||
@ -3920,7 +3917,7 @@ static int drain_message_find_any(size_t count, int tag, int peer,
|
||||
continue;
|
||||
}
|
||||
|
||||
if( OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
if( OPAL_EQUAL != orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&(cur_peer_ref->proc_name),
|
||||
&(comm->c_local_group->grp_proc_pointers[peer]->proc_name)) ) {
|
||||
continue;
|
||||
@ -4139,10 +4136,10 @@ static int drain_message_copy_remove(ompi_crcp_bkmrk_pml_drain_message_ref_t *dr
|
||||
/************************************************
|
||||
* Peer List Utility Functions
|
||||
************************************************/
|
||||
static ompi_crcp_bkmrk_pml_peer_ref_t * find_peer(orte_process_name_t proc)
|
||||
static ompi_crcp_bkmrk_pml_peer_ref_t * find_peer(orca_process_name_t proc)
|
||||
{
|
||||
opal_list_item_t* item = NULL;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
orca_name_cmp_bitmask_t mask;
|
||||
|
||||
for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs);
|
||||
item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs);
|
||||
@ -4150,9 +4147,9 @@ static ompi_crcp_bkmrk_pml_peer_ref_t * find_peer(orte_process_name_t proc)
|
||||
ompi_crcp_bkmrk_pml_peer_ref_t *cur_peer_ref;
|
||||
cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item;
|
||||
|
||||
mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID;
|
||||
mask = ORCA_NAME_CMP_JOBID | ORCA_NAME_CMP_VPID;
|
||||
|
||||
if( OPAL_EQUAL == orte_util_compare_name_fields(mask,
|
||||
if( OPAL_EQUAL == orca_process_name_compare(mask,
|
||||
&(cur_peer_ref->proc_name),
|
||||
&proc) ) {
|
||||
return cur_peer_ref;
|
||||
@ -4254,7 +4251,7 @@ static int ft_event_coordinate_peers(void)
|
||||
if( stall_for_completion ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s **** STALLING %s in PID %d ***",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
(current_msg_type == COORD_MSG_TYPE_B_SEND ? "Send" : "Recv"),
|
||||
getpid() ));
|
||||
step_to_return_to = 1;
|
||||
@ -4281,7 +4278,7 @@ static int ft_event_coordinate_peers(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Coordination Finished...\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME)));
|
||||
|
||||
/*
|
||||
* Now that all our peer channels are marked as drained
|
||||
@ -4313,8 +4310,8 @@ static int ft_event_finalize_exchange(void)
|
||||
ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref;
|
||||
peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item;
|
||||
|
||||
if( OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
(ORTE_PROC_MY_NAME),
|
||||
if( OPAL_EQUAL != orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
(ORCA_PROC_MY_NAME),
|
||||
&(peer_ref->proc_name)) ) {
|
||||
TRAFFIC_MSG_DUMP_PEER(10, (peer_ref, "finalize_exchange", false));
|
||||
}
|
||||
@ -4421,10 +4418,11 @@ static int ft_event_finalize_exchange(void)
|
||||
static int ft_event_exchange_bookmarks(void)
|
||||
{
|
||||
int peer_idx = 0;
|
||||
int my_idx = ORTE_PROC_MY_NAME->vpid;
|
||||
int my_idx;
|
||||
int iter = 0;
|
||||
int num_peers = 0;
|
||||
|
||||
|
||||
my_idx = orca_process_info_get_vpid(ORCA_PROC_MY_NAME);
|
||||
num_peers = opal_list_get_size(&ompi_crcp_bkmrk_pml_peer_refs);
|
||||
|
||||
for( peer_idx = (num_peers - my_idx - 1), iter = 0;
|
||||
@ -4464,15 +4462,15 @@ static int ft_event_check_bookmarks(void)
|
||||
int p_n_from_p_m = 0;
|
||||
|
||||
if( 10 <= mca_crcp_bkmrk_component.super.verbose ) {
|
||||
sleep(ORTE_PROC_MY_NAME->vpid);
|
||||
sleep(orca_process_info_get_vpid(ORCA_PROC_MY_NAME));
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"---------------------------------------------"));
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"Process %s Match Table",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME)));
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"%s %5s | %7s | %7s | %7s | %7s |",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
"Vpid", "T_Send", "M_Recv", "M_Send", "T_Recv"));
|
||||
|
||||
for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs);
|
||||
@ -4490,7 +4488,7 @@ static int ft_event_check_bookmarks(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"%s %5d | %7d | %7d | %7d | %7d |",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
peer_ref->proc_name.vpid,
|
||||
t_send, m_recv, m_send, t_recv));
|
||||
}
|
||||
@ -4509,8 +4507,8 @@ static int ft_event_check_bookmarks(void)
|
||||
ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref;
|
||||
peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item;
|
||||
|
||||
if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
(ORTE_PROC_MY_NAME),
|
||||
if( OPAL_EQUAL == orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
(ORCA_PROC_MY_NAME),
|
||||
&(peer_ref->proc_name)) ) {
|
||||
continue;
|
||||
}
|
||||
@ -4518,7 +4516,7 @@ static int ft_event_check_bookmarks(void)
|
||||
TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Bookmark Details --", false));
|
||||
|
||||
/* Lowest Rank sends first */
|
||||
if( ORTE_PROC_MY_NAME->vpid < peer_ref->proc_name.vpid ) {
|
||||
if( orca_process_info_get_vpid(ORCA_PROC_MY_NAME) < peer_ref->proc_name.vpid ) {
|
||||
/********************
|
||||
* Check P_n --> P_m
|
||||
* Has the peer received all the messages that I have put on the wire?
|
||||
@ -4532,8 +4530,8 @@ static int ft_event_check_bookmarks(void)
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Total Sent (%4d) = Matched Recv. (%4d) => Diff (%4d). "
|
||||
" WARNING: Peer received more than was sent. :(\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4546,8 +4544,8 @@ static int ft_event_check_bookmarks(void)
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Total Sent (%4d) = Matched Recv. (%4d). Peer needs %4d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4560,7 +4558,7 @@ static int ft_event_check_bookmarks(void)
|
||||
if( OMPI_SUCCESS != (ret = send_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: check_bookmarks: Unable to send message details to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
ORCA_NAME_PRINT(&peer_ref->proc_name),
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
@ -4579,8 +4577,8 @@ static int ft_event_check_bookmarks(void)
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Matched Sent (%4d) = Total Recv. (%4d) => Diff (%4d). "
|
||||
" WARNING: I received more than the peer sent. :(\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4593,8 +4591,8 @@ static int ft_event_check_bookmarks(void)
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Matched Sent (%4d) = Total Recv. (%4d). I need %4d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4606,7 +4604,7 @@ static int ft_event_check_bookmarks(void)
|
||||
if( OMPI_SUCCESS != (ret = recv_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: check_bookmarks: Unable to recv message details from peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
ORCA_NAME_PRINT(&peer_ref->proc_name),
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
@ -4627,8 +4625,8 @@ static int ft_event_check_bookmarks(void)
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Matched Sent (%4d) = Total Recv. (%4d) => Diff (%4d). "
|
||||
" WARNING: I received more than the peer sent. :(\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4641,8 +4639,8 @@ static int ft_event_check_bookmarks(void)
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Matched Sent (%4d) = Total Recv. (%4d). I need %4d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4654,7 +4652,7 @@ static int ft_event_check_bookmarks(void)
|
||||
if( OMPI_SUCCESS != (ret = recv_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: check_bookmarks: Unable to recv message details from peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
ORCA_NAME_PRINT(&peer_ref->proc_name),
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
@ -4673,8 +4671,8 @@ static int ft_event_check_bookmarks(void)
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Total Sent (%4d) = Matched Recv. (%4d) => Diff (%4d). "
|
||||
" WARNING: Peer received more than was sent. :(\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4687,8 +4685,8 @@ static int ft_event_check_bookmarks(void)
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Total Sent (%4d) = Matched Recv. (%4d). Peer needs %4d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4701,7 +4699,7 @@ static int ft_event_check_bookmarks(void)
|
||||
if( OMPI_SUCCESS != (ret = send_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: check_bookmarks: Unable to send message details to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
ORCA_NAME_PRINT(&peer_ref->proc_name),
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
@ -4726,7 +4724,7 @@ static int ft_event_post_drain_acks(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Wait on %d Drain ACK Messages.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
(int)req_size));
|
||||
|
||||
/*
|
||||
@ -4739,15 +4737,15 @@ static int ft_event_post_drain_acks(void)
|
||||
drain_msg_ack = (ompi_crcp_bkmrk_pml_drain_message_ack_ref_t*)item;
|
||||
|
||||
/* Post the receive */
|
||||
if( ORTE_SUCCESS != (ret = orte_rml.recv_buffer_nb( &drain_msg_ack->peer,
|
||||
if( ORCA_SUCCESS != (ret = orca_oob_recv_buffer_nb( &drain_msg_ack->peer,
|
||||
OMPI_CRCP_COORD_BOOKMARK_TAG,
|
||||
0,
|
||||
drain_message_ack_cbfunc,
|
||||
NULL) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s: Failed to post a RML receive to the peer\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(drain_msg_ack->peer)));
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(drain_msg_ack->peer)));
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -4756,9 +4754,9 @@ static int ft_event_post_drain_acks(void)
|
||||
}
|
||||
|
||||
static void drain_message_ack_cbfunc(int status,
|
||||
orte_process_name_t* sender,
|
||||
orca_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orca_oob_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
int ret, exit_status = OMPI_SUCCESS;
|
||||
@ -4782,15 +4780,15 @@ static void drain_message_ack_cbfunc(int status,
|
||||
/* If this ACK has not completed yet */
|
||||
if(!drain_msg_ack->complete) {
|
||||
/* If it is the correct peer */
|
||||
if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
if( OPAL_EQUAL == orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
&(drain_msg_ack->peer),
|
||||
sender) ) {
|
||||
/* We found it! */
|
||||
drain_msg_ack->complete = true;
|
||||
OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s Received ACK of FLUSH from peer\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender) ));
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(sender) ));
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -4798,8 +4796,8 @@ static void drain_message_ack_cbfunc(int status,
|
||||
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s ERROR: Unable to match ACK to peer\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender) );
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(sender) );
|
||||
|
||||
cleanup:
|
||||
return;
|
||||
@ -4820,8 +4818,8 @@ static int ft_event_post_drained(void)
|
||||
item = opal_list_get_next(item) ) {
|
||||
cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item;
|
||||
|
||||
if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
(ORTE_PROC_MY_NAME),
|
||||
if( OPAL_EQUAL == orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
(ORCA_PROC_MY_NAME),
|
||||
&(cur_peer_ref->proc_name)) ) {
|
||||
continue;
|
||||
}
|
||||
@ -4885,8 +4883,8 @@ static int ft_event_post_drained(void)
|
||||
cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item;
|
||||
peer_total = 0;
|
||||
|
||||
if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
(ORTE_PROC_MY_NAME),
|
||||
if( OPAL_EQUAL == orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
(ORCA_PROC_MY_NAME),
|
||||
&(cur_peer_ref->proc_name)) ) {
|
||||
continue;
|
||||
}
|
||||
@ -4932,8 +4930,8 @@ static int ft_event_post_drained(void)
|
||||
if( peer_total > 0 || stall_for_completion ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Will be draining %4d messages from this peer. Total %4d %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(cur_peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(cur_peer_ref->proc_name)),
|
||||
peer_total,
|
||||
quiesce_request_count,
|
||||
(stall_for_completion ? "(And Stalling)" : "") ));
|
||||
@ -4963,8 +4961,8 @@ static int ft_event_post_drain_message(ompi_crcp_bkmrk_pml_drain_message_ref_t
|
||||
if( content_ref->already_posted ) {
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Found a message that we do not need to post.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(drain_msg_ref->proc_name)) ));
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(drain_msg_ref->proc_name)) ));
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -4977,8 +4975,8 @@ static int ft_event_post_drain_message(ompi_crcp_bkmrk_pml_drain_message_ref_t
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((20, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Posting a message to be drained from rank %d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(drain_msg_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(drain_msg_ref->proc_name)),
|
||||
drain_msg_ref->rank));
|
||||
if( OMPI_SUCCESS != (ret = wrapped_pml_module->pml_irecv(content_ref->buffer,
|
||||
(drain_msg_ref->count * drain_msg_ref->ddt_size),
|
||||
@ -4989,8 +4987,8 @@ static int ft_event_post_drain_message(ompi_crcp_bkmrk_pml_drain_message_ref_t
|
||||
&(content_ref->request) ) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Failed to post the Draining PML iRecv\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(drain_msg_ref->proc_name)) );
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(drain_msg_ref->proc_name)) );
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -5008,7 +5006,7 @@ static int ft_event_wait_quiesce(void)
|
||||
if( OMPI_SUCCESS != (ret = wait_quiesce_drained() ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: wait_quiesce: %s Failed to quiesce drained messages\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME) );
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
@ -5019,7 +5017,7 @@ static int ft_event_wait_quiesce(void)
|
||||
if( OMPI_SUCCESS != (ret = wait_quiesce_drain_ack() ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: wait_quiesce: %s Failed to recv all drain ACKs\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME) );
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
@ -5041,7 +5039,7 @@ static int wait_quiesce_drained(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Waiting on %d messages to drain\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
(int)quiesce_request_count));
|
||||
|
||||
/*
|
||||
@ -5070,8 +5068,8 @@ static int wait_quiesce_drained(void)
|
||||
item = opal_list_get_next(item) ) {
|
||||
cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item;
|
||||
|
||||
if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
(ORTE_PROC_MY_NAME),
|
||||
if( OPAL_EQUAL == orca_process_name_compare(ORCA_NAME_CMP_ALL,
|
||||
(ORCA_PROC_MY_NAME),
|
||||
&(cur_peer_ref->proc_name)) ) {
|
||||
continue;
|
||||
}
|
||||
@ -5085,8 +5083,8 @@ static int wait_quiesce_drained(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s Send ACKs to Peer\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(cur_peer_ref->proc_name)) ));
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(cur_peer_ref->proc_name)) ));
|
||||
|
||||
/* Send All Clear to Peer */
|
||||
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
|
||||
@ -5097,7 +5095,7 @@ static int wait_quiesce_drained(void)
|
||||
PACK_BUFFER(buffer, response, 1, OPAL_SIZE, "");
|
||||
|
||||
/* JJH - Performance Optimization? - Why not post all isends, then wait? */
|
||||
if ( 0 > ( ret = orte_rml.send_buffer(&(cur_peer_ref->proc_name), buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
if ( 0 > ( ret = orca_oob_send_buffer(&(cur_peer_ref->proc_name), buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
@ -5186,7 +5184,7 @@ static int coord_request_wait_all( size_t count,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Request Wait: Done with idx %d of %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
(int)i, (int)count));
|
||||
}
|
||||
|
||||
@ -5225,7 +5223,7 @@ static int wait_quiesce_drain_ack(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Waiting on %d Drain ACK messages\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
num_outstanding));
|
||||
|
||||
while(0 < num_outstanding) {
|
||||
@ -5258,7 +5256,7 @@ static int wait_quiesce_drain_ack(void)
|
||||
static int send_bookmarks(int peer_idx)
|
||||
{
|
||||
ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref;
|
||||
orte_process_name_t peer_name;
|
||||
orca_process_name_t peer_name;
|
||||
opal_buffer_t *buffer = NULL;
|
||||
int exit_status = OMPI_SUCCESS;
|
||||
int ret;
|
||||
@ -5267,7 +5265,7 @@ static int send_bookmarks(int peer_idx)
|
||||
/*
|
||||
* Find the peer structure for this peer
|
||||
*/
|
||||
peer_name.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
peer_name.jobid = orca_process_info_get_jobid(ORCA_PROC_MY_NAME);
|
||||
peer_name.vpid = peer_idx;
|
||||
|
||||
if( NULL == (peer_ref = find_peer(peer_name))) {
|
||||
@ -5280,8 +5278,8 @@ static int send_bookmarks(int peer_idx)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s Sending bookmark (S[%6d] R[%6d])\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer_name),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&peer_name),
|
||||
peer_ref->total_msgs_sent,
|
||||
peer_ref->total_msgs_recvd));
|
||||
|
||||
@ -5298,10 +5296,10 @@ static int send_bookmarks(int peer_idx)
|
||||
PACK_BUFFER(buffer, (peer_ref->total_msgs_recvd), 1, OPAL_UINT32,
|
||||
"crcp:bkmrk: send_bookmarks: Unable to pack total_msgs_recvd");
|
||||
|
||||
if ( 0 > ( ret = orte_rml.send_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
if ( 0 > ( ret = orca_oob_send_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: send_bookmarks: Failed to send bookmark to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_name),
|
||||
ORCA_NAME_PRINT(&peer_name),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5321,23 +5319,23 @@ static int send_bookmarks(int peer_idx)
|
||||
/* Paired with send_bookmarks */
|
||||
static int recv_bookmarks(int peer_idx)
|
||||
{
|
||||
orte_process_name_t peer_name;
|
||||
orca_process_name_t peer_name;
|
||||
int exit_status = OMPI_SUCCESS;
|
||||
int ret;
|
||||
|
||||
START_TIMER(CRCP_TIMER_CKPT_EX_PEER_R);
|
||||
|
||||
peer_name.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
peer_name.jobid = orca_process_info_get_jobid(ORCA_PROC_MY_NAME);
|
||||
peer_name.vpid = peer_idx;
|
||||
|
||||
if ( 0 > (ret = orte_rml.recv_buffer_nb(&peer_name,
|
||||
if ( 0 > (ret = orca_oob_recv_buffer_nb(&peer_name,
|
||||
OMPI_CRCP_COORD_BOOKMARK_TAG,
|
||||
0,
|
||||
recv_bookmarks_cbfunc,
|
||||
NULL) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_bookmarks: Failed to post receive bookmark from peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_name),
|
||||
ORCA_NAME_PRINT(&peer_name),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5354,15 +5352,15 @@ static int recv_bookmarks(int peer_idx)
|
||||
}
|
||||
|
||||
static void recv_bookmarks_cbfunc(int status,
|
||||
orte_process_name_t* sender,
|
||||
orca_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orca_oob_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref;
|
||||
int exit_status = OMPI_SUCCESS;
|
||||
int ret, tmp_int;
|
||||
orte_vpid_t peer_idx;
|
||||
orca_vpid_t peer_idx;
|
||||
|
||||
peer_idx = sender->vpid;
|
||||
|
||||
@ -5387,8 +5385,8 @@ static void recv_bookmarks_cbfunc(int status,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Received bookmark (S[%6d] R[%6d]) vs. (S[%6d] R[%6d])\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(sender),
|
||||
peer_ref->matched_msgs_sent,
|
||||
peer_ref->matched_msgs_recvd,
|
||||
peer_ref->total_msgs_sent,
|
||||
@ -5452,8 +5450,8 @@ static int send_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
if(OMPI_SUCCESS != (ret = do_send_msg_detail(peer_ref, msg_ref, &num_matches, &p_total_found, &finished)) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: send_msg_details: %s --> %s Failed to send message details to peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
}
|
||||
|
||||
@ -5515,8 +5513,8 @@ static int send_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
opal_list_append(&drained_msg_ack_list, &(d_msg_ack->super));
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-> %s Message Inflight! Will wait on ACK from this peer.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name))));
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name))));
|
||||
|
||||
END_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_S);
|
||||
DISPLAY_INDV_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_S, peer_ref->proc_name.vpid, total_details_sent);
|
||||
@ -5589,11 +5587,11 @@ static int do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
/*
|
||||
* Do the send...
|
||||
*/
|
||||
if ( 0 > ( ret = orte_rml.send_buffer(&peer_ref->proc_name, buffer,
|
||||
if ( 0 > ( ret = orca_oob_send_buffer(&peer_ref->proc_name, buffer,
|
||||
OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: do_send_msg_detail: Unable to send message details to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
ORCA_NAME_PRINT(&peer_ref->proc_name),
|
||||
ret);
|
||||
|
||||
exit_status = OMPI_ERROR;
|
||||
@ -5616,12 +5614,12 @@ static int do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
/*
|
||||
* Recv the ACK msg
|
||||
*/
|
||||
if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer,
|
||||
if ( 0 > (ret = orca_oob_recv_buffer(&peer_ref->proc_name, buffer,
|
||||
OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: do_send_msg_detail: %s --> %s Failed to receive ACK buffer from peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5707,8 +5705,8 @@ static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_details: %s <-- %s "
|
||||
"Failed to receive message detail from peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5728,8 +5726,8 @@ static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_details: %s <-- %s "
|
||||
"Failed to check message detail from peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5740,8 +5738,8 @@ static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Recv Detail: Stage --: [%3d / %3d] [%3d, %3d, %s]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
need, found,
|
||||
num_resolved, total_details_recv,
|
||||
( need <= found ? "T" : "F") ));
|
||||
@ -5758,8 +5756,8 @@ static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
if(OMPI_SUCCESS != (ret = do_recv_msg_detail_resp(peer_ref, response, num_resolved, found))) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_details: %s <-- %s Failed to respond to peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5790,11 +5788,11 @@ static int do_recv_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
/*
|
||||
* Recv the msg
|
||||
*/
|
||||
if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
|
||||
if ( 0 > (ret = orca_oob_recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: do_recv_msg_detail: %s <-- %s Failed to receive buffer from peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5853,8 +5851,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 0: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -5876,8 +5874,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_detail_check: %s -- %s "
|
||||
"Failed to determine if we have received this message. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5926,8 +5924,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 1: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -6011,8 +6009,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 2: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d, active %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -6060,8 +6058,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Recv Check: Found a message that is 'active'! Prepare to STALL.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)) ));
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)) ));
|
||||
stall_for_completion = true;
|
||||
}
|
||||
else {
|
||||
@ -6069,8 +6067,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Recv Check: Found a message that is 'active', but is not the current recv! "
|
||||
"No stall required [%3d, %3d, %3d, %3d].\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
(int)current_msg_id,
|
||||
(int)current_msg_type,
|
||||
(int)posted_recv_msg_ref->msg_id,
|
||||
@ -6128,8 +6126,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 3: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d, active %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -6165,8 +6163,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 4: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d, active %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -6199,10 +6197,10 @@ static int do_recv_msg_detail_resp(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
PACK_BUFFER(buffer, total_found, 1, OPAL_UINT32,
|
||||
"crcp:bkmrk: recv_msg_details: Unable to ask peer for more messages");
|
||||
|
||||
if ( 0 > ( ret = orte_rml.send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
if ( 0 > ( ret = orca_oob_send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_detail_resp: Unable to send message detail response to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
ORCA_NAME_PRINT(&peer_ref->proc_name),
|
||||
ret);
|
||||
exit_status = OMPI_ERROR;
|
||||
goto cleanup;
|
||||
@ -6261,12 +6259,12 @@ static void display_all_timers(int state) {
|
||||
double barrier_start, barrier_stop;
|
||||
int i;
|
||||
|
||||
if( 0 != ORTE_PROC_MY_NAME->vpid ) {
|
||||
if( 0 != orca_process_info_get_vpid(ORCA_PROC_MY_NAME) ) {
|
||||
if( 2 > timing_enabled ) {
|
||||
return;
|
||||
}
|
||||
else if( 2 == timing_enabled ) {
|
||||
orte_grpcomm.barrier();
|
||||
orca_coll_barrier(ORCA_COLL_TYPE_BARRIER_CR);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -6287,7 +6285,7 @@ static void display_all_timers(int state) {
|
||||
|
||||
if( timing_enabled >= 2) {
|
||||
barrier_start = get_time();
|
||||
orte_grpcomm.barrier();
|
||||
orca_coll_barrier(ORCA_COLL_TYPE_BARRIER_CR);
|
||||
barrier_stop = get_time();
|
||||
opal_output(0,
|
||||
"crcp:bkmrk: timing(%20s): %20s = %10.2f s\n",
|
||||
@ -6307,7 +6305,7 @@ static void display_indv_timer_core(int idx, int proc, int msgs, bool direct) {
|
||||
double diff = timer_end[idx] - timer_start[idx];
|
||||
char * str = NULL;
|
||||
|
||||
if( 0 != ORTE_PROC_MY_NAME->vpid && timing_enabled < 3 ) {
|
||||
if( 0 != orca_process_info_get_vpid(ORCA_PROC_MY_NAME) && timing_enabled < 3 ) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -6509,16 +6507,16 @@ static void traffic_message_dump_msg_list(opal_list_t *msg_list, bool is_drain)
|
||||
|
||||
static void traffic_message_dump_peer(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, char * msg, bool root_only)
|
||||
{
|
||||
if( root_only && orte_process_info.my_name.vpid != 0 ) {
|
||||
if( root_only && orca_process_info_get_vpid(ORCA_PROC_MY_NAME) != 0 ) {
|
||||
return;
|
||||
} else {
|
||||
sleep(orte_process_info.my_name.vpid * 2);
|
||||
sleep(orca_process_info_get_vpid(ORCA_PROC_MY_NAME) * 2);
|
||||
}
|
||||
|
||||
opal_output(0, "------------- %s ---------------------------------", msg);
|
||||
opal_output(0, "%s <-> %s Totals Sent [ %3d / %3d ] Recv [ %3d / %3d ]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_sent,
|
||||
peer_ref->matched_msgs_sent,
|
||||
peer_ref->total_msgs_recvd,
|
||||
|
@ -7,7 +7,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -234,7 +234,7 @@ BEGIN_C_DECLS
|
||||
opal_list_t msg_contents;
|
||||
|
||||
/** Peer which we received from */
|
||||
orte_process_name_t proc_name;
|
||||
orca_process_name_t proc_name;
|
||||
|
||||
/**
|
||||
* Count of the number of completed PML messages that match this reference.
|
||||
@ -271,7 +271,7 @@ BEGIN_C_DECLS
|
||||
bool complete;
|
||||
|
||||
/** Peer which we received from */
|
||||
orte_process_name_t peer;
|
||||
orca_process_name_t peer;
|
||||
};
|
||||
typedef struct ompi_crcp_bkmrk_pml_drain_message_ack_ref_t ompi_crcp_bkmrk_pml_drain_message_ack_ref_t;
|
||||
|
||||
@ -312,7 +312,7 @@ BEGIN_C_DECLS
|
||||
opal_list_t msg_contents;
|
||||
|
||||
/** Peer which we received from */
|
||||
orte_process_name_t proc_name;
|
||||
orca_process_name_t proc_name;
|
||||
|
||||
/* Sample movement of values (mirrored for send):
|
||||
* Recv() iRecv() irecv_init() start() req_complete()
|
||||
@ -379,7 +379,7 @@ BEGIN_C_DECLS
|
||||
opal_list_item_t super;
|
||||
|
||||
/** Name of peer */
|
||||
orte_process_name_t proc_name;
|
||||
orca_process_name_t proc_name;
|
||||
|
||||
/** List of messages sent to this peer */
|
||||
opal_list_t send_list; /**< pml_send */
|
||||
|
@ -75,7 +75,7 @@ error.
|
||||
.SS none CRCP Component
|
||||
.PP
|
||||
The \fInone\fP component simply selects no CRCP component. All of the CRCP
|
||||
function calls return immediately with ORTE_SUCCESS.
|
||||
function calls return immediately with OMPI_SUCCESS.
|
||||
.
|
||||
.PP
|
||||
This component is the last component to be selected by default. This means that if
|
||||
|
@ -9,6 +9,7 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -27,7 +28,7 @@ libmca_dpm_la_SOURCES += \
|
||||
base/dpm_base_select.c \
|
||||
base/dpm_base_null_fns.c
|
||||
|
||||
if !ORTE_DISABLE_FULL_SUPPORT
|
||||
if ORCA_WITH_FULL_ORTE_SUPPORT
|
||||
libmca_dpm_la_SOURCES += \
|
||||
base/dpm_base_common_fns.c
|
||||
endif
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -67,7 +68,7 @@ OMPI_DECLSPEC int ompi_dpm_base_select(void);
|
||||
*/
|
||||
OMPI_DECLSPEC int ompi_dpm_base_close(void);
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
#if ORCA_WITH_FULL_ORTE_SUPPORT
|
||||
|
||||
/* Internal support functions */
|
||||
OMPI_DECLSPEC char* ompi_dpm_base_dyn_init (void);
|
||||
@ -91,10 +92,10 @@ int ompi_dpm_base_null_spawn(int count, char **array_of_commands,
|
||||
int ompi_dpm_base_null_dyn_init(void);
|
||||
int ompi_dpm_base_null_dyn_finalize (void);
|
||||
void ompi_dpm_base_null_mark_dyncomm (ompi_communicator_t *comm);
|
||||
int ompi_dpm_base_null_open_port(char *port_name, orte_rml_tag_t given_tag);
|
||||
int ompi_dpm_base_null_open_port(char *port_name, orca_oob_tag_t given_tag);
|
||||
int ompi_dpm_base_null_parse_port(char *port_name,
|
||||
char **hnp_uri, char **rml_uri, orte_rml_tag_t *tag);
|
||||
int ompi_dpm_base_null_route_to_port(char *rml_uri, orte_process_name_t *rproc);
|
||||
char **hnp_uri, char **rml_uri, orca_oob_tag_t *tag);
|
||||
int ompi_dpm_base_null_route_to_port(char *rml_uri, orca_process_name_t *rproc);
|
||||
int ompi_dpm_base_null_close_port(char *port_name);
|
||||
|
||||
/* useful globals */
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -237,7 +237,7 @@ void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm)
|
||||
int i;
|
||||
int size, rsize;
|
||||
bool found=false;
|
||||
orte_jobid_t thisjobid;
|
||||
orca_jobid_t thisjobid;
|
||||
ompi_group_t *grp=NULL;
|
||||
ompi_proc_t *proc = NULL;
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -66,18 +66,18 @@ void ompi_dpm_base_null_mark_dyncomm (ompi_communicator_t *comm)
|
||||
return;
|
||||
}
|
||||
|
||||
int ompi_dpm_base_null_open_port(char *port_name, orte_rml_tag_t given_tag)
|
||||
int ompi_dpm_base_null_open_port(char *port_name, orca_oob_tag_t given_tag)
|
||||
{
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
int ompi_dpm_base_null_parse_port(char *port_name,
|
||||
char **hnp_uri, char **rml_uri, orte_rml_tag_t *tag)
|
||||
char **hnp_uri, char **rml_uri, orca_oob_tag_t *tag)
|
||||
{
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
int ompi_dpm_base_null_route_to_port(char *rml_uri, orte_process_name_t *rproc)
|
||||
int ompi_dpm_base_null_route_to_port(char *rml_uri, orca_process_name_t *rproc)
|
||||
{
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -30,7 +31,7 @@
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "ompi/info/info.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
@ -38,9 +39,9 @@ BEGIN_C_DECLS
|
||||
|
||||
/* OMPI port definitions */
|
||||
/* carry over the INVALID def */
|
||||
#define OMPI_RML_TAG_INVALID ORTE_RML_TAG_INVALID
|
||||
#define OMPI_RML_TAG_INVALID ORCA_OOB_TAG_INVALID
|
||||
/* define a starting point to avoid conflicts */
|
||||
#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX
|
||||
#define OMPI_RML_TAG_BASE ORCA_OOB_TAG_MAX
|
||||
|
||||
#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1
|
||||
#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2
|
||||
@ -116,20 +117,20 @@ typedef void (*ompi_dpm_base_module_mark_dyncomm_fn_t)(ompi_communicator_t *comm
|
||||
* specified tag is valid, then it will be used to form the port. Otherwise,
|
||||
* a dynamically assigned tag that is unique to this request will be provided
|
||||
*/
|
||||
typedef int (*ompi_dpm_base_module_open_port_fn_t)(char *port_name, orte_rml_tag_t tag);
|
||||
typedef int (*ompi_dpm_base_module_open_port_fn_t)(char *port_name, orca_oob_tag_t tag);
|
||||
|
||||
/*
|
||||
* Converts an opaque port string to a RML process nane and tag.
|
||||
*/
|
||||
typedef int (*ompi_dpm_base_module_parse_port_name_t)(char *port_name,
|
||||
char **hnp_uri, char **rml_uri,
|
||||
orte_rml_tag_t *tag);
|
||||
orca_oob_tag_t *tag);
|
||||
|
||||
/*
|
||||
* Update the routed component to make sure that the RML can send messages to
|
||||
* the remote port
|
||||
*/
|
||||
typedef int (*ompi_dpm_base_module_route_to_port_t)(char *rml_uri, orte_process_name_t *rproc);
|
||||
typedef int (*ompi_dpm_base_module_route_to_port_t)(char *rml_uri, orca_process_name_t *rproc);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -4,6 +4,7 @@
|
||||
# Copyright (c) 2009 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -14,6 +15,9 @@
|
||||
|
||||
EXTRA_DIST = .windows
|
||||
|
||||
CFLAGS = $(dpm_orte_CFLAGS)
|
||||
AM_CPPFLAGS = $(dpm_orte_CPPFLAGS)
|
||||
|
||||
dist_pkgdata_DATA = help-ompi-dpm-orte.txt
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
|
@ -2,7 +2,7 @@
|
||||
#
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -15,7 +15,15 @@
|
||||
AC_DEFUN([MCA_ompi_dpm_orte_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/dpm/orte/Makefile])
|
||||
|
||||
AS_IF([test "$orte_without_full_support" = 0],
|
||||
[$1],
|
||||
ORCA_CHECK_ORTE([dpm_orte],
|
||||
[dpm_orte_happy="yes"],
|
||||
[dpm_orte_happy="no"])
|
||||
|
||||
AS_IF([test "$dpm_orte_happy" = "yes" -a "$orca_without_orte_full_support" = 0],
|
||||
[AC_SUBST([dpm_orte_CFLAGS])
|
||||
AC_SUBST([dpm_orte_CPPFLAGS])
|
||||
AC_SUBST([dpm_orte_LDFLAGS])
|
||||
AC_SUBST([dpm_orte_LIBS])
|
||||
$1],
|
||||
[$2])
|
||||
])dnl
|
||||
|
@ -10,6 +10,7 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -27,6 +28,10 @@ AM_CPPFLAGS = \
|
||||
-I$(top_ompi_builddir)/orte/include \
|
||||
-I$(top_ompi_srcdir)/orte \
|
||||
-I$(top_ompi_srcdir)/orte/include \
|
||||
-I$(top_ompi_builddir)/orca \
|
||||
-I$(top_ompi_builddir)/orca/include \
|
||||
-I$(top_ompi_srcdir)/orca \
|
||||
-I$(top_ompi_srcdir)/orca/include \
|
||||
-I$(top_ompi_builddir)/ompi \
|
||||
-I$(top_ompi_builddir)/ompi/include \
|
||||
-I$(top_ompi_srcdir)/ompi \
|
||||
|
@ -12,6 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -30,10 +31,9 @@
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
@ -115,9 +115,9 @@ mca_mpool_base_module_t* mca_mpool_base_module_create(
|
||||
opal_mem_hooks_support_level())) {
|
||||
opal_mem_hooks_register_release(mca_mpool_base_mem_cb, NULL);
|
||||
} else {
|
||||
orte_show_help("help-mpool-base.txt", "leave pinned failed",
|
||||
true, name, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename);
|
||||
orca_show_help("help-mpool-base.txt", "leave pinned failed",
|
||||
true, name, ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
orca_process_info_get_nodename() );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,8 +28,7 @@
|
||||
#endif
|
||||
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/mca/mpool/base/mpool_base_mem_cb.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
@ -72,17 +72,17 @@ void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata,
|
||||
if (from_alloc) {
|
||||
int len;
|
||||
len = snprintf(msg, sizeof(msg), "[%s:%d] Attempt to free memory that is still in use by an ongoing MPI communication (buffer %p, size %lu). MPI job will now abort.\n",
|
||||
orte_process_info.nodename,
|
||||
getpid(),
|
||||
base, (unsigned long) size);
|
||||
orca_process_info_get_nodename(),
|
||||
getpid(),
|
||||
base, (unsigned long) size);
|
||||
msg[sizeof(msg) - 1] = '\0';
|
||||
write(2, msg, len);
|
||||
} else {
|
||||
orte_show_help("help-mpool-base.txt",
|
||||
"cannot deregister in-use memory", true,
|
||||
current->mpool_component->mpool_version.mca_component_name,
|
||||
orte_process_info.nodename,
|
||||
base, (unsigned long) size);
|
||||
orca_show_help("help-mpool-base.txt",
|
||||
"cannot deregister in-use memory", true,
|
||||
current->mpool_component->mpool_version.mca_component_name,
|
||||
orca_process_info_get_nodename(),
|
||||
base, (unsigned long) size);
|
||||
}
|
||||
|
||||
/* We're in a callback from somewhere; we can't do
|
||||
|
@ -12,6 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -26,10 +27,7 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/class/ompi_rb_tree.h"
|
||||
@ -178,18 +176,18 @@ void mca_mpool_base_tree_print(void)
|
||||
|
||||
if (num_leaks <= ompi_debug_show_mpi_alloc_mem_leaks ||
|
||||
ompi_debug_show_mpi_alloc_mem_leaks < 0) {
|
||||
orte_show_help("help-mpool-base.txt", "all mem leaks",
|
||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename,
|
||||
orte_process_info.pid, leak_msg);
|
||||
orca_show_help("help-mpool-base.txt", "all mem leaks",
|
||||
true, ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
orca_process_info_get_nodename(),
|
||||
orca_process_info_get_pid(), leak_msg);
|
||||
} else {
|
||||
int i = num_leaks - ompi_debug_show_mpi_alloc_mem_leaks;
|
||||
orte_show_help("help-mpool-base.txt", "some mem leaks",
|
||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename,
|
||||
orte_process_info.pid, leak_msg, i,
|
||||
(i > 1) ? "s were" : " was",
|
||||
(i > 1) ? "are" : "is");
|
||||
orca_show_help("help-mpool-base.txt", "some mem leaks",
|
||||
true, ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
orca_process_info_get_nodename(),
|
||||
orca_process_info_get_pid(), i,
|
||||
(i > 1) ? "s were" : " was",
|
||||
(i > 1) ? "are" : "is");
|
||||
}
|
||||
free(leak_msg);
|
||||
leak_msg = NULL;
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -26,7 +26,7 @@
|
||||
#include "ompi_config.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "mpool_grdma.h"
|
||||
#ifdef HAVE_UNISTD_H
|
||||
|
@ -16,7 +16,7 @@
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,8 +27,7 @@
|
||||
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
|
||||
#include "ompi_config.h"
|
||||
#include "opal/align.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "mpool_grdma.h"
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
@ -452,7 +451,7 @@ void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool)
|
||||
if (true == mca_mpool_grdma_component.print_stats) {
|
||||
opal_output(0, "%s grdma: stats "
|
||||
"(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
mpool_grdma->stat_cache_hit, mpool_grdma->stat_cache_miss,
|
||||
mpool_grdma->stat_cache_found, mpool_grdma->stat_cache_notfound,
|
||||
mpool_grdma->stat_evicted);
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -77,8 +78,7 @@
|
||||
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
|
||||
#include "ompi_config.h"
|
||||
#include "opal/align.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "ompi/mca/mpool/rgpusm/mpool_rgpusm.h"
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
@ -556,7 +556,7 @@ void mca_mpool_rgpusm_finalize(struct mca_mpool_base_module_t *mpool)
|
||||
if(true == mca_mpool_rgpusm_component.print_stats) {
|
||||
opal_output(0, "%s rgpusm: stats "
|
||||
"(hit/valid/invalid/miss/evicted): %d/%d/%d/%d/%d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORCA_NAME_PRINT(ORCA_PROC_MY_NAME),
|
||||
mpool_rgpusm->stat_cache_hit, mpool_rgpusm->stat_cache_valid,
|
||||
mpool_rgpusm->stat_cache_invalid, mpool_rgpusm->stat_cache_miss,
|
||||
mpool_rgpusm->stat_evicted);
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -31,7 +32,7 @@
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/mca/allocator/base/base.h"
|
||||
#include "mpool_sm.h"
|
||||
@ -199,8 +200,8 @@ static mca_mpool_base_module_t* mca_mpool_sm_init(
|
||||
|
||||
/* create initial shared memory mapping */
|
||||
len = asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename );
|
||||
orca_process_info_get_job_session_dir(),
|
||||
orca_process_info_get_nodename() );
|
||||
if ( 0 > len ) {
|
||||
free(mpool_module);
|
||||
free(procs);
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -29,7 +30,7 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
#include "orte/mca/sstore/sstore.h"
|
||||
@ -175,14 +176,14 @@ int mca_mpool_sm_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
/* Record the shared memory filename */
|
||||
asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename );
|
||||
orca_process_info_get_job_session_dir(),
|
||||
orca_process_info_get_nodename() );
|
||||
orte_sstore.set_attr(orte_sstore_handle_current, SSTORE_METADATA_LOCAL_TOUCH, file_name);
|
||||
free(file_name);
|
||||
file_name = NULL;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
if(orte_cr_continue_like_restart) {
|
||||
if(orca_info_cr_continue_like_restart()) {
|
||||
/* Find the sm module */
|
||||
self_module = mca_mpool_base_module_lookup("sm");
|
||||
self_sm_module = (mca_mpool_sm_module_t*) self_module;
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -10,14 +11,15 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
@ -62,9 +64,9 @@ static uint32_t ompi_mtl_mxm_get_job_id(void)
|
||||
memset(uu, 0, sizeof(unique_job_key));
|
||||
|
||||
if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) {
|
||||
orte_show_help("help-mtl-mxm.txt", "no uuid present", true,
|
||||
orca_show_help("help-mtl-mxm.txt", "no uuid present", true,
|
||||
generated_key ? "could not be parsed from" :
|
||||
"not present in", orte_process_info.nodename);
|
||||
"not present in", orca_process_info_get_nodename());
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -92,7 +94,7 @@ static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm
|
||||
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
|
||||
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
|
||||
orca_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
|
||||
true, (int)ptlid, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -148,13 +150,13 @@ int ompi_mtl_mxm_module_init(void)
|
||||
}
|
||||
MXM_VERBOSE(1, "MXM support enabled");
|
||||
|
||||
if ((lr = orte_ess.get_node_rank(ORTE_PROC_MY_NAME)) == ORTE_NODE_RANK_INVALID) {
|
||||
if ((lr = orca_node_info_get_rank(ORCA_PROC_MY_NAME)) == ORCA_NODE_RANK_INVALID) {
|
||||
MXM_ERROR("Unable to obtain local node rank");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
for (proc = 0; proc < totps; proc++) {
|
||||
if(OPAL_PROC_ON_LOCAL_NODE(orte_ess.proc_get_locality(&procs[proc]->proc_name))) {
|
||||
if(OPAL_PROC_ON_LOCAL_NODE(orca_process_get_locality(&procs[proc]->proc_name))) {
|
||||
mxlr = max(mxlr, procs[proc]->proc_name.vpid);
|
||||
nlps++;
|
||||
}
|
||||
@ -191,7 +193,7 @@ int ompi_mtl_mxm_module_init(void)
|
||||
/* Open MXM endpoint */
|
||||
err = mxm_ep_create(ompi_mtl_mxm.mxm_context, &ep_opt, &ompi_mtl_mxm.ep);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "unable to create endpoint", true,
|
||||
orca_show_help("help-mtl-mxm.txt", "unable to create endpoint", true,
|
||||
mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -378,7 +380,7 @@ int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
|
||||
|
||||
err = mxm_mq_create(ompi_mtl_mxm.mxm_context, comm->c_contextid, &mq);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "mxm mq create", true, mxm_error_string(err));
|
||||
orca_show_help("help-mtl-mxm.txt", "mxm mq create", true, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -402,7 +404,7 @@ int ompi_mtl_mxm_progress(void)
|
||||
|
||||
err = mxm_progress(ompi_mtl_mxm.mxm_context);
|
||||
if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err) ) {
|
||||
orte_show_help("help-mtl-mxm.txt", "errors during mxm_progress", true, mxm_error_string(err));
|
||||
orca_show_help("help-mtl-mxm.txt", "errors during mxm_progress", true, mxm_error_string(err));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -9,7 +10,7 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
@ -93,7 +94,7 @@ static int ompi_mtl_mxm_component_open(void)
|
||||
if (MXM_ERR_NO_DEVICE == err) {
|
||||
MXM_VERBOSE(1, "No supported device found, disqualifying mxm");
|
||||
} else {
|
||||
orte_show_help("help-mtl-mxm.txt", "mxm init", true,
|
||||
orca_show_help("help-mtl-mxm.txt", "mxm init", true,
|
||||
mxm_error_string(err));
|
||||
}
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -11,7 +12,7 @@
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
@ -76,7 +77,7 @@ int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
/* post-recv */
|
||||
err = mxm_req_recv(mxm_recv_req);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error posting receive", true,
|
||||
orca_show_help("help-mtl-mxm.txt", "error posting receive", true,
|
||||
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
@ -1,4 +1,6 @@
|
||||
/* * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -9,7 +11,7 @@
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orca/include/rte_orca.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
@ -74,7 +76,7 @@ int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
|
||||
/* post-send */
|
||||
err = mxm_req_send(&mxm_send_req);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
|
||||
orca_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -134,7 +136,7 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
/* post-send */
|
||||
err = mxm_req_send(mxm_send_req);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
|
||||
orca_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче
Block a user