Until a real fix for #142 is found, this workaround prohibits using
mpi_leave_pinned when multiple OpenIB HCA ports are found. Specifically, if mpi_leave_pinned == 1 and ultiple HCA ports are found, the MCA parameter btl_openib_max_btls is set to 1. If the MCA parameter btl_openib_warn_leave_pinned_multi_port is true, emit a warning that this happened (having an MCA parameter to control the warning allows users/sysadmins to turn it off instead of being nagged for every run). This commit was SVN r10424.
Этот коммит содержится в:
родитель
600bf4295a
Коммит
1d27ca5d0a
25
README
25
README
@ -40,7 +40,7 @@ Thanks for your time.
|
||||
===========================================================================
|
||||
|
||||
The following abbreviated list of release notes applies to this code
|
||||
base as of this writing (17 Jun 2006):
|
||||
base as of this writing (19 Jun 2006):
|
||||
|
||||
- Open MPI includes support for a wide variety of supplemental
|
||||
hardware and software package. When configuring Open MPI, you may
|
||||
@ -213,6 +213,11 @@ base as of this writing (17 Jun 2006):
|
||||
MCA parameter btl_mx_max_send_size can be used to vary the maximum
|
||||
size of subsequent fragments.
|
||||
|
||||
- Using multiple OpenIB HCA ports combined with setting the MCA
|
||||
parameters mpi_leave_pinned to 1 is not supported in this release.
|
||||
If mpi_leave_pinned is set to 1, the openib BTL component will
|
||||
automatically limit itself to use 1 HCA port.
|
||||
|
||||
- The current version of the Open MPI point-to-point engine does not
|
||||
yet support hardware-level MPI message matching. As such, MPI
|
||||
message matching must be performed in software, artificially
|
||||
@ -283,25 +288,37 @@ for a full list); a summary of the more commonly used ones follows:
|
||||
Install Open MPI into the base directory named <directory>. Hence,
|
||||
Open MPI will place its executables in <directory>/bin, its header
|
||||
files in <directory>/include, its libraries in <directory>/lib, etc.
|
||||
More fine-grained control over directory names is available; see the
|
||||
output of "./configure --help" for more details.
|
||||
|
||||
--with-gm=<directory>
|
||||
Specify the directory where the GM libraries and header files are
|
||||
located. This enables GM support in Open MPI.
|
||||
|
||||
--with-gm-libdir=<directory>
|
||||
JMS
|
||||
|
||||
--with-mx=<directory>
|
||||
Specify the directory where the MX libraries and header files are
|
||||
located. This enables MX support in Open MPI.
|
||||
|
||||
--with-mx-libdir=<directory>
|
||||
JMS
|
||||
|
||||
--with-mvapi=<directory>
|
||||
Specify the directory where the mVAPI libraries and header files are
|
||||
located. This enables mVAPI support in Open MPI.
|
||||
|
||||
--with-mvapi-libdir=<directory>
|
||||
JMS
|
||||
|
||||
--with-openib=<directory>
|
||||
Specify the directory where the Open Fabrics (previously known as
|
||||
OpenIB) libraries and header files are located. This enables Open
|
||||
Fabrics support in Open MPI. This option will likely be be
|
||||
deprecated in favor of "--with-openfrabrics" in a future version of
|
||||
Open MPI.
|
||||
Fabrics support in Open MPI.
|
||||
|
||||
--with-openib-libdir=<directory>
|
||||
JMS
|
||||
|
||||
--with-tm=<directory>
|
||||
Specify the directory where the TM libraries and header files are
|
||||
|
@ -120,6 +120,9 @@ struct mca_btl_openib_component_t {
|
||||
uint32_t eager_rdma_num;
|
||||
uint32_t max_eager_rdma;
|
||||
|
||||
/** Until ticket #142 is fixed; do we print a warning if
|
||||
mpi_leave_pinned is true and multiple HCA ports are found? */
|
||||
int warn_leave_pinned_multi_port;
|
||||
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
|
||||
|
||||
extern mca_btl_openib_component_t mca_btl_openib_component;
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -230,6 +231,10 @@ int mca_btl_openib_component_open(void)
|
||||
mca_btl_openib_component.max_send_size = mca_btl_openib_module.super.btl_max_send_size;
|
||||
mca_btl_openib_component.eager_limit = mca_btl_openib_module.super.btl_eager_limit;
|
||||
|
||||
/* Until ticket #142 is fixed */
|
||||
mca_btl_openib_param_register_int("warn_leave_pinned_multi_port",
|
||||
"If set to a true value, emit a warning when the MCA parameter mpi_leave_pinned is set to a true value and multiple HCA ports are found.",
|
||||
1, &mca_btl_openib_component.warn_leave_pinned_multi_port);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -390,6 +395,20 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* JMS: Workaround until ticket #142 is fixed. If
|
||||
mpi_leave_pinned == true, then override ib_max_btls and set it
|
||||
to 1. If btl_openib_warn_leave_pinned_multi_port is true, emit
|
||||
a warning that we did this. */
|
||||
i = mca_base_param_find("mpi", NULL, "leave_pinned");
|
||||
mca_base_param_lookup_int(i, &j);
|
||||
if (num_devs > 1 && 0 != j) {
|
||||
mca_btl_openib_component.ib_max_btls = 1;
|
||||
if (mca_btl_openib_component.warn_leave_pinned_multi_port) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"btl_openib:leave_pinned_multi_port", true);
|
||||
}
|
||||
}
|
||||
|
||||
#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST == 0
|
||||
/* Allocate space for the ib devices */
|
||||
ib_devs = (struct ibv_device**) malloc(num_devs * sizeof(struct ibv_dev*));
|
||||
|
@ -44,3 +44,15 @@ respect to the retry count:
|
||||
4.096 microseconds * (2^btl_openib_ib_timeout)
|
||||
|
||||
See the InfiniBand spec 1.2 (section 12.7.34) for more details.
|
||||
[btl_openib:leave_pinned_multi_port]
|
||||
# Until ticket #142 is fixed
|
||||
This release of Open MPI does not support setting the
|
||||
"mpi_leave_pinned" parameter to a true value when using multiple HCA
|
||||
ports. This warning is emitted when multiple HCA ports are detected
|
||||
and "mpi_leave_pinned" is set to a true value, and is to inform you
|
||||
that Open MPI is going to automatically disregard all HCA ports beyond
|
||||
the first one (i.e., the MCA parameter "btl_openib_max_btls" parameter
|
||||
has been overridden and set to 1).
|
||||
|
||||
You may silence this warning by setting the
|
||||
"btl_openib_warn_leave_pinned_multi_port" MCA parameter to 0.
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user