support for solaris relaxed ordering
This commit was SVN r20167.
Этот коммит содержится в:
родитель
f911b1a63d
Коммит
213daa58da
@ -154,10 +154,76 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
dat_strerror(rc, (const char**)&major,
|
||||
(const char**)&minor);
|
||||
|
||||
#if defined(__SVR4) && defined(__sun)
|
||||
if (strcmp(major, "DAT_INVALID_PARAMETER") == 0 &&
|
||||
strcmp(minor, "DAT_INVALID_RO_COOKIE") == 0) {
|
||||
/* Some platforms that Solaris runs on implement the PCI
|
||||
* standard for relaxed ordering(RO). Using RDMA with
|
||||
* polling on a memory location as the uDAPL (and openib
|
||||
* BTL by the way) BTL does for short messages with
|
||||
* relaxed ordering could potentially produce silent data
|
||||
* corruption. For this reason we need to detect systems
|
||||
* which support relaxed ordering and turn off RDMA for
|
||||
* short messages. The uDAPL standard does not provide a
|
||||
* way to inform users of this scenario so Sun has
|
||||
* implemented the following. If a platform supports
|
||||
* relaxed ordering when the interface name is passed into
|
||||
* the dat_ia_open() call, the call will return
|
||||
* DAT_INVALID_PARAMETER and DAT_INVALID_RO_COOKIE.
|
||||
* DAT_INVALID_RO_COOKIE is not part of the uDAPL standard
|
||||
* at this time. The only way to open this interface is to
|
||||
* prefix the following cookie "RO_AWARE_" to the ia name
|
||||
* that was retreived from the dat registry.
|
||||
*
|
||||
* Example: ia_name = "ib0", new expected name will be
|
||||
* "RO_AWARE_ib0".
|
||||
*
|
||||
* Here, since our first ia open attempt failed in the
|
||||
* predetermined way, add the cookie and try to open again.
|
||||
**/
|
||||
DAT_NAME_PTR ro_ia_name;
|
||||
|
||||
/* prefix relaxed order cookie to ia_name */
|
||||
asprintf(&ro_ia_name, "RO_AWARE_%s", ia_name);
|
||||
if (NULL == ro_ia_name) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* because this is not standard inform user in some way */
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_INFORM,
|
||||
("help-mpi-btl-udapl.txt", "relaxed order support",
|
||||
true, ia_name, ro_ia_name));
|
||||
|
||||
/* try and open again */
|
||||
btl->udapl_evd_async = DAT_HANDLE_NULL;
|
||||
rc = dat_ia_open(ro_ia_name, btl->udapl_async_evd_qlen,
|
||||
&btl->udapl_evd_async, &btl->udapl_ia);
|
||||
|
||||
dat_strerror(rc, (const char**)&major,
|
||||
(const char**)&minor);
|
||||
|
||||
if (DAT_SUCCESS == rc) {
|
||||
/* do not allow RDMA for short messages */
|
||||
mca_btl_udapl_component.udapl_use_eager_rdma = 0;
|
||||
free(ro_ia_name);
|
||||
} else {
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt",
|
||||
"dat_ia_open fail RO", true, ro_ia_name,
|
||||
major, minor, ia_name));
|
||||
|
||||
free(ro_ia_name);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"dat_ia_open fail", true, ia_name, major, minor));
|
||||
|
||||
return OMPI_ERROR;
|
||||
#if defined(__SVR4) && defined(__sun)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* create a protection zone */
|
||||
|
@ -74,6 +74,7 @@ struct mca_btl_udapl_component_t {
|
||||
int udapl_free_list_num; /**< initial size of free lists */
|
||||
int udapl_free_list_max; /**< maximum size of free lists */
|
||||
int udapl_free_list_inc; /**< number of elements to alloc when growing */
|
||||
int32_t udapl_use_eager_rdma; /**< turn rdma for small msgs on/off */
|
||||
int32_t udapl_eager_rdma_num; /**< number of rdma buffers allocated
|
||||
for short messages */
|
||||
int32_t udapl_max_eager_rdma_peers; /**< maximum number of peers allowed to
|
||||
|
@ -738,8 +738,9 @@ static int mca_btl_udapl_endpoint_finish_eager(
|
||||
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
||||
|
||||
/* establish eager rdma connection */
|
||||
if (btl->udapl_eager_rdma_endpoint_count <
|
||||
mca_btl_udapl_component.udapl_max_eager_rdma_peers) {
|
||||
if ((1 == mca_btl_udapl_component.udapl_use_eager_rdma) &&
|
||||
(btl->udapl_eager_rdma_endpoint_count <
|
||||
mca_btl_udapl_component.udapl_max_eager_rdma_peers)) {
|
||||
mca_btl_udapl_endpoint_connect_eager_rdma(endpoint);
|
||||
}
|
||||
|
||||
|
@ -170,6 +170,14 @@ int mca_btl_udapl_register_mca_params(void)
|
||||
&mca_btl_udapl_component.udapl_sr_win,
|
||||
REGINT_GE_ONE), tmp_rc, rc);
|
||||
|
||||
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("use_eager_rdma",
|
||||
"Use of RDMA for small messages : "
|
||||
"1 = default, use RDMA for small messages; "
|
||||
"0 = do not use RDMA for small messages. ",
|
||||
1,
|
||||
&mca_btl_udapl_component.udapl_use_eager_rdma,
|
||||
REGINT_GE_ZERO), tmp_rc, rc);
|
||||
|
||||
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("eager_rdma_num",
|
||||
"Number of RDMA buffers to allocate "
|
||||
"for small messages (must be >= 1).",
|
||||
|
@ -122,3 +122,18 @@ WARNING: Host %s, not able to determine netmask for address
|
||||
%s. Will attempt to continue assuming all addresses to
|
||||
peer are reachable.
|
||||
#
|
||||
[relaxed order support]
|
||||
WARNING: While attempting to open interface %s the system reported
|
||||
DAT_INVALID_RO_COOKIE. This indicates the currrent system supports
|
||||
relaxed ordering. An attempt will be made to open the interface using
|
||||
the following modified interface name %s. Open MPI must not use RDMA
|
||||
for short eager messages in this scenario. Therefore, if opened
|
||||
successfully RDMA will not be used for short eager messages. This will
|
||||
negatively impact short message latency.
|
||||
#
|
||||
[dat_ia_open fail RO]
|
||||
WARNING: Failed to open "%s" [%s:%s].
|
||||
Attempted to call dat_ia_open() on an interface that has been prefixed
|
||||
with "RO_AWARE_" after first trying to open %s and failed with
|
||||
DAT_INVALID_RO_COOKIE.
|
||||
#
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user