1
1
Commit d7053a3 broke things for the case when Open MPI 4.0.x is built
without UCX support.  Problem was it was trying to partially initialize
the btl to try and delay printing of a help message till wireup.  Well
this sort of doesn't work in all cases.  Rather than keep piling on
changes to support a help message for a BTL that we are deprecating, take
a keep it simple stupid approach.

So, revert most of d7053a3 and instead put the help message back in the
original location, during scan of ports of the available HCAs to check
for whether or not link layer for that port is configured for ethernet or infiniband.
If Open MPI was built with UCX support, don't emit the help message, if
UCX was not linked in, emit the help message.

Verified on a system with connectX5 HCAs configured with two ports configured
for ethernet and two for infiniband.

relates to #6785

Signed-off-by: Howard Pritchard <howardp@lanl.gov>
Этот коммит содержится в:
Howard Pritchard 2019-07-12 07:35:56 -06:00
родитель 2df46aca54
Коммит 71f240f078
4 изменённых файлов: 105 добавлений и 133 удалений

Просмотреть файл

@ -135,9 +135,11 @@ AC_DEFUN([OMPI_CHECK_UCX],[
[$1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_ucx_CPPFLAGS"
$1_LDFLAGS="[$]$1_LDFLAGS $ompi_check_ucx_LDFLAGS"
$1_LIBS="[$]$1_LIBS $ompi_check_ucx_LIBS"
AC_DEFINE([HAVE_UCX], [1], [have ucx])
$2],
[AS_IF([test ! -z "$with_ucx" && test "$with_ucx" != "no"],
[AC_MSG_ERROR([UCX support requested but not found. Aborting])])
AC_DEFINE([HAVE_UCX], [0], [have ucx])
$3])
OPAL_VAR_SCOPE_POP

Просмотреть файл

@ -22,6 +22,7 @@
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved
* Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -1040,15 +1041,6 @@ int mca_btl_openib_add_procs(
int btl_rank = 0;
volatile mca_btl_base_endpoint_t* endpoint;
if (! openib_btl->allowed) {
opal_bitmap_clear_all_bits(reachable);
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
true, opal_process_info.nodename,
openib_btl->device_name, openib_btl->port_num);
return OPAL_SUCCESS;
}
btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt);
if( 0 > btl_rank ){
return OPAL_ERR_NOT_FOUND;
@ -1648,83 +1640,82 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
return OPAL_SUCCESS;
}
if (openib_btl->allowed) {
/* Release all QPs */
if (NULL != openib_btl->device->endpoints) {
for (ep_index=0;
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
ep_index++) {
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
/* Release all QPs */
if (NULL != openib_btl->device->endpoints) {
for (ep_index=0;
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
ep_index++) {
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
ep_index);
if(!endpoint) {
BTL_VERBOSE(("In finalize, got another null endpoint"));
continue;
}
if(endpoint->endpoint_btl != openib_btl) {
continue;
}
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
openib_btl->device->eager_rdma_buffers[i] = NULL;
OBJ_RELEASE(endpoint);
}
}
opal_pointer_array_set_item(openib_btl->device->endpoints,
ep_index, NULL);
assert(((opal_object_t*)endpoint)->obj_reference_count == 1);
OBJ_RELEASE(endpoint);
if(!endpoint) {
BTL_VERBOSE(("In finalize, got another null endpoint"));
continue;
}
}
/* Release SRQ resources */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
if (NULL != openib_btl->qps[qp].u.srq_qp.srq) {
opal_mutex_t *lock =
&mca_btl_openib_component.srq_manager.lock;
opal_hash_table_t *srq_addr_table =
&mca_btl_openib_component.srq_manager.srq_addr_table;
opal_mutex_lock(lock);
if (OPAL_SUCCESS !=
opal_hash_table_remove_value_ptr(srq_addr_table,
&openib_btl->qps[qp].u.srq_qp.srq,
sizeof(struct ibv_srq *))) {
BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp));
rc = OPAL_ERROR;
}
opal_mutex_unlock(lock);
if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) {
BTL_VERBOSE(("Failed to close SRQ %d", qp));
rc = OPAL_ERROR;
}
if(endpoint->endpoint_btl != openib_btl) {
continue;
}
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
openib_btl->device->eager_rdma_buffers[i] = NULL;
OBJ_RELEASE(endpoint);
}
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
}
opal_pointer_array_set_item(openib_btl->device->endpoints,
ep_index, NULL);
assert(((opal_object_t*)endpoint)->obj_reference_count == 1);
OBJ_RELEASE(endpoint);
}
}
/* Finalize the CPC modules on this openib module */
for (i = 0; i < openib_btl->num_cpcs; ++i) {
if (NULL != openib_btl->cpcs[i]->cbm_finalize) {
openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]);
/* Release SRQ resources */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
if (NULL != openib_btl->qps[qp].u.srq_qp.srq) {
opal_mutex_t *lock =
&mca_btl_openib_component.srq_manager.lock;
opal_hash_table_t *srq_addr_table =
&mca_btl_openib_component.srq_manager.srq_addr_table;
opal_mutex_lock(lock);
if (OPAL_SUCCESS !=
opal_hash_table_remove_value_ptr(srq_addr_table,
&openib_btl->qps[qp].u.srq_qp.srq,
sizeof(struct ibv_srq *))) {
BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp));
rc = OPAL_ERROR;
}
opal_mutex_unlock(lock);
if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) {
BTL_VERBOSE(("Failed to close SRQ %d", qp));
rc = OPAL_ERROR;
}
}
free(openib_btl->cpcs[i]);
}
free(openib_btl->cpcs);
/* Release device if there are no more users */
if(!(--openib_btl->device->allowed_btls)) {
OBJ_RELEASE(openib_btl->device);
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
}
}
/* Finalize the CPC modules on this openib module */
for (i = 0; i < openib_btl->num_cpcs; ++i) {
if (NULL != openib_btl->cpcs[i]->cbm_finalize) {
openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]);
}
free(openib_btl->cpcs[i]);
}
free(openib_btl->cpcs);
/* Release device if there are no more users */
if(!(--openib_btl->device->allowed_btls)) {
OBJ_RELEASE(openib_btl->device);
}
if (NULL != openib_btl->qps) {
free(openib_btl->qps);
}

Просмотреть файл

@ -20,6 +20,8 @@
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -506,8 +508,6 @@ struct mca_btl_openib_module_t {
int local_procs; /** number of local procs */
bool atomic_ops_be; /** atomic result is big endian */
bool allowed; /** is this port allowed */
};
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;

Просмотреть файл

@ -22,6 +22,7 @@
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -278,9 +279,6 @@ static int btl_openib_modex_send(void)
);
/* For each module, add in the size of the per-CPC data */
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
if (! mca_btl_openib_component.openib_btls[i]->allowed) {
continue;
}
for (j = 0;
j < mca_btl_openib_component.openib_btls[i]->num_cpcs;
++j) {
@ -309,9 +307,6 @@ static int btl_openib_modex_send(void)
/* Pack each of the modules */
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
if (! mca_btl_openib_component.openib_btls[i]->allowed) {
continue;
}
/* Pack the modex common message struct. */
size = modex_message_size;
@ -633,38 +628,26 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
* unless the user specifically requested to override this
* policy. For ancient OFED, only allow if user has set
* the MCA parameter.
*
* We emit a help message if Open MPI was configured without
* UCX support if the port is configured to use infiniband for link
* layer. If UCX support is available, don't emit help message
* since UCX PML has higher priority than OB1 and this BTL will
* not be used.
*/
if (! mca_btl_openib_component.allow_ib
if (false == mca_btl_openib_component.allow_ib
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET
&& IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer
#endif
) {
openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t));
if(NULL == openib_btl) {
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return OPAL_ERR_OUT_OF_RESOURCE;
}
memcpy(openib_btl, &mca_btl_openib_module,
sizeof(mca_btl_openib_module));
ib_selected = OBJ_NEW(mca_btl_base_selected_module_t);
ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl;
openib_btl->port_num = (uint8_t) port_num;
openib_btl->allowed = false;
openib_btl->device = NULL;
openib_btl->device_name = strdup(ibv_get_device_name(device->ib_dev));
OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t);
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
opal_pointer_array_add(device->device_btls, (void*) openib_btl);
++device->btls;
++mca_btl_openib_component.ib_num_btls;
if (-1 != mca_btl_openib_component.ib_max_btls &&
mca_btl_openib_component.ib_num_btls >=
mca_btl_openib_component.ib_max_btls) {
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
return OPAL_SUCCESS;
}
#if !HAVE_UCX
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
true, opal_process_info.nodename,
ibv_get_device_name(device->ib_dev),
port_num);
#endif
return OPAL_ERR_NOT_FOUND;
}
/* Ensure that the requested GID index (via the
btl_openib_gid_index MCA param) is within the GID table
@ -901,8 +884,6 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
}
}
openib_btl->allowed = true;
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
opal_pointer_array_add(device->device_btls, (void*) openib_btl);
++device->btls;
@ -2999,29 +2980,27 @@ btl_openib_component_init(int *num_btl_modules,
ib_selected = (mca_btl_base_selected_module_t*)item;
openib_btl = (mca_btl_openib_module_t*)ib_selected->btl_module;
if (openib_btl->allowed) {
/* Search for a CPC that can handle this port */
ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl);
/* If we get NOT_SUPPORTED, then no CPC was found for this
port. But that's not a fatal error -- just keep going;
let's see if we find any usable openib modules or not. */
if (OPAL_ERR_NOT_SUPPORTED == ret) {
continue;
} else if (OPAL_SUCCESS != ret) {
/* All others *are* fatal. Note that we already did a
show_help in the lower layer */
goto no_btls;
}
/* Search for a CPC that can handle this port */
ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl);
/* If we get NOT_SUPPORTED, then no CPC was found for this
port. But that's not a fatal error -- just keep going;
let's see if we find any usable openib modules or not. */
if (OPAL_ERR_NOT_SUPPORTED == ret) {
continue;
} else if (OPAL_SUCCESS != ret) {
/* All others *are* fatal. Note that we already did a
show_help in the lower layer */
goto no_btls;
}
if (mca_btl_openib_component.max_hw_msg_size > 0 &&
(uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) {
BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")",
mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz));
}
if (mca_btl_openib_component.max_hw_msg_size > 0 &&
(uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) {
BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")",
mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz));
}
if (finish_btl_init(openib_btl) != OPAL_SUCCESS) {
goto no_btls;
}
if (finish_btl_init(openib_btl) != OPAL_SUCCESS) {
goto no_btls;
}
mca_btl_openib_component.openib_btls[i] = openib_btl;