btl/openib: delay UCX warning to add_procs()
If UCX is available, then pml/ucx will be used instead of pml/ob1 + btl/openib, so there is no need to warn about btl/openib not supporting Infiniband. Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp> (cherry picked from commit open-mpi/ompi@0a2ce58040)
Этот коммит содержится в:
родитель
efcbc13d2f
Коммит
d7053a306a
@ -19,8 +19,8 @@
|
|||||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||||
* Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved.
|
||||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
* Copyright (c) 2014-2018 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* Copyright (c) 2014 Bull SAS. All rights reserved
|
* Copyright (c) 2014 Bull SAS. All rights reserved
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -1040,6 +1040,14 @@ int mca_btl_openib_add_procs(
|
|||||||
int btl_rank = 0;
|
int btl_rank = 0;
|
||||||
volatile mca_btl_base_endpoint_t* endpoint;
|
volatile mca_btl_base_endpoint_t* endpoint;
|
||||||
|
|
||||||
|
|
||||||
|
if (! openib_btl->allowed) {
|
||||||
|
opal_bitmap_clear_all_bits(reachable);
|
||||||
|
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
|
||||||
|
true, opal_process_info.nodename,
|
||||||
|
ibv_get_device_name(openib_btl->device->ib_dev), openib_btl->port_num);
|
||||||
|
}
|
||||||
|
|
||||||
btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt);
|
btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt);
|
||||||
if( 0 > btl_rank ){
|
if( 0 > btl_rank ){
|
||||||
return OPAL_ERR_NOT_FOUND;
|
return OPAL_ERR_NOT_FOUND;
|
||||||
@ -1639,75 +1647,77 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
|
|||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Release all QPs */
|
if (openib_btl->allowed) {
|
||||||
if (NULL != openib_btl->device->endpoints) {
|
/* Release all QPs */
|
||||||
for (ep_index=0;
|
if (NULL != openib_btl->device->endpoints) {
|
||||||
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
|
for (ep_index=0;
|
||||||
ep_index++) {
|
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
|
||||||
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
|
ep_index++) {
|
||||||
|
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
|
||||||
ep_index);
|
ep_index);
|
||||||
if(!endpoint) {
|
if(!endpoint) {
|
||||||
BTL_VERBOSE(("In finalize, got another null endpoint"));
|
BTL_VERBOSE(("In finalize, got another null endpoint"));
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
if(endpoint->endpoint_btl != openib_btl) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
|
|
||||||
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
|
|
||||||
openib_btl->device->eager_rdma_buffers[i] = NULL;
|
|
||||||
OBJ_RELEASE(endpoint);
|
|
||||||
}
|
}
|
||||||
}
|
if(endpoint->endpoint_btl != openib_btl) {
|
||||||
opal_pointer_array_set_item(openib_btl->device->endpoints,
|
continue;
|
||||||
ep_index, NULL);
|
|
||||||
assert(((opal_object_t*)endpoint)->obj_reference_count == 1);
|
|
||||||
OBJ_RELEASE(endpoint);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Release SRQ resources */
|
|
||||||
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
|
|
||||||
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
|
|
||||||
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
|
|
||||||
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
|
|
||||||
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
|
|
||||||
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
|
|
||||||
if (NULL != openib_btl->qps[qp].u.srq_qp.srq) {
|
|
||||||
opal_mutex_t *lock =
|
|
||||||
&mca_btl_openib_component.srq_manager.lock;
|
|
||||||
|
|
||||||
opal_hash_table_t *srq_addr_table =
|
|
||||||
&mca_btl_openib_component.srq_manager.srq_addr_table;
|
|
||||||
|
|
||||||
opal_mutex_lock(lock);
|
|
||||||
if (OPAL_SUCCESS !=
|
|
||||||
opal_hash_table_remove_value_ptr(srq_addr_table,
|
|
||||||
&openib_btl->qps[qp].u.srq_qp.srq,
|
|
||||||
sizeof(struct ibv_srq *))) {
|
|
||||||
BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp));
|
|
||||||
rc = OPAL_ERROR;
|
|
||||||
}
|
}
|
||||||
opal_mutex_unlock(lock);
|
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
|
||||||
if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) {
|
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
|
||||||
BTL_VERBOSE(("Failed to close SRQ %d", qp));
|
openib_btl->device->eager_rdma_buffers[i] = NULL;
|
||||||
rc = OPAL_ERROR;
|
OBJ_RELEASE(endpoint);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
opal_pointer_array_set_item(openib_btl->device->endpoints,
|
||||||
|
ep_index, NULL);
|
||||||
|
assert(((opal_object_t*)endpoint)->obj_reference_count == 1);
|
||||||
|
OBJ_RELEASE(endpoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
|
|
||||||
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* Finalize the CPC modules on this openib module */
|
/* Release SRQ resources */
|
||||||
for (i = 0; i < openib_btl->num_cpcs; ++i) {
|
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
|
||||||
if (NULL != openib_btl->cpcs[i]->cbm_finalize) {
|
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||||
openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]);
|
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
|
||||||
|
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
|
||||||
|
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
|
||||||
|
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
|
||||||
|
if (NULL != openib_btl->qps[qp].u.srq_qp.srq) {
|
||||||
|
opal_mutex_t *lock =
|
||||||
|
&mca_btl_openib_component.srq_manager.lock;
|
||||||
|
|
||||||
|
opal_hash_table_t *srq_addr_table =
|
||||||
|
&mca_btl_openib_component.srq_manager.srq_addr_table;
|
||||||
|
|
||||||
|
opal_mutex_lock(lock);
|
||||||
|
if (OPAL_SUCCESS !=
|
||||||
|
opal_hash_table_remove_value_ptr(srq_addr_table,
|
||||||
|
&openib_btl->qps[qp].u.srq_qp.srq,
|
||||||
|
sizeof(struct ibv_srq *))) {
|
||||||
|
BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp));
|
||||||
|
rc = OPAL_ERROR;
|
||||||
|
}
|
||||||
|
opal_mutex_unlock(lock);
|
||||||
|
if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) {
|
||||||
|
BTL_VERBOSE(("Failed to close SRQ %d", qp));
|
||||||
|
rc = OPAL_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
|
||||||
|
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
free(openib_btl->cpcs[i]);
|
|
||||||
|
/* Finalize the CPC modules on this openib module */
|
||||||
|
for (i = 0; i < openib_btl->num_cpcs; ++i) {
|
||||||
|
if (NULL != openib_btl->cpcs[i]->cbm_finalize) {
|
||||||
|
openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]);
|
||||||
|
}
|
||||||
|
free(openib_btl->cpcs[i]);
|
||||||
|
}
|
||||||
|
free(openib_btl->cpcs);
|
||||||
}
|
}
|
||||||
free(openib_btl->cpcs);
|
|
||||||
|
|
||||||
/* Release device if there are no more users */
|
/* Release device if there are no more users */
|
||||||
if(!(--openib_btl->device->btls)) {
|
if(!(--openib_btl->device->btls)) {
|
||||||
|
@ -18,8 +18,8 @@
|
|||||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved.
|
||||||
* Copyright (c) 2014 Bull SAS. All rights reserved.
|
* Copyright (c) 2014 Bull SAS. All rights reserved.
|
||||||
* Copyright (c) 2015-2016 Research Organization for Information Science
|
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -164,6 +164,9 @@ struct mca_btl_openib_component_t {
|
|||||||
int ib_num_btls;
|
int ib_num_btls;
|
||||||
/**< number of devices available to the openib component */
|
/**< number of devices available to the openib component */
|
||||||
|
|
||||||
|
int ib_allowed_btls;
|
||||||
|
/**< number of devices allowed to the openib component */
|
||||||
|
|
||||||
struct mca_btl_openib_module_t **openib_btls;
|
struct mca_btl_openib_module_t **openib_btls;
|
||||||
/**< array of available BTLs */
|
/**< array of available BTLs */
|
||||||
|
|
||||||
@ -501,6 +504,8 @@ struct mca_btl_openib_module_t {
|
|||||||
int local_procs; /** number of local procs */
|
int local_procs; /** number of local procs */
|
||||||
|
|
||||||
bool atomic_ops_be; /** atomic result is big endian */
|
bool atomic_ops_be; /** atomic result is big endian */
|
||||||
|
|
||||||
|
bool allowed; /** is this port allowed */
|
||||||
};
|
};
|
||||||
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
|
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
|
||||||
|
|
||||||
|
@ -19,8 +19,8 @@
|
|||||||
* Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
|
||||||
* Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved
|
* Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved
|
||||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2014-2017 Research Organization for Information Science
|
* Copyright (c) 2014-2018 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* Copyright (c) 2014 Bull SAS. All rights reserved.
|
* Copyright (c) 2014 Bull SAS. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -269,7 +269,7 @@ static int btl_openib_modex_send(void)
|
|||||||
/* uint8_t for number of modules in the message */
|
/* uint8_t for number of modules in the message */
|
||||||
1 +
|
1 +
|
||||||
/* For each module: */
|
/* For each module: */
|
||||||
mca_btl_openib_component.ib_num_btls *
|
mca_btl_openib_component.ib_allowed_btls *
|
||||||
(
|
(
|
||||||
/* Common module data */
|
/* Common module data */
|
||||||
modex_message_size +
|
modex_message_size +
|
||||||
@ -278,6 +278,9 @@ static int btl_openib_modex_send(void)
|
|||||||
);
|
);
|
||||||
/* For each module, add in the size of the per-CPC data */
|
/* For each module, add in the size of the per-CPC data */
|
||||||
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
|
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
|
||||||
|
if (! mca_btl_openib_component.openib_btls[i]->allowed) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
for (j = 0;
|
for (j = 0;
|
||||||
j < mca_btl_openib_component.openib_btls[i]->num_cpcs;
|
j < mca_btl_openib_component.openib_btls[i]->num_cpcs;
|
||||||
++j) {
|
++j) {
|
||||||
@ -300,12 +303,15 @@ static int btl_openib_modex_send(void)
|
|||||||
|
|
||||||
/* Pack the number of modules */
|
/* Pack the number of modules */
|
||||||
offset = message;
|
offset = message;
|
||||||
pack8(&offset, mca_btl_openib_component.ib_num_btls);
|
pack8(&offset, mca_btl_openib_component.ib_allowed_btls);
|
||||||
opal_output(-1, "modex sending %d btls (packed: %d, offset now at %d)", mca_btl_openib_component.ib_num_btls, *((uint8_t*) message), (int) (offset - message));
|
opal_output(-1, "modex sending %d btls (packed: %d, offset now at %d)", mca_btl_openib_component.ib_allowed_btls, *((uint8_t*) message), (int) (offset - message));
|
||||||
|
|
||||||
/* Pack each of the modules */
|
/* Pack each of the modules */
|
||||||
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
|
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
|
||||||
|
|
||||||
|
if (! mca_btl_openib_component.openib_btls[i]->allowed) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
/* Pack the modex common message struct. */
|
/* Pack the modex common message struct. */
|
||||||
size = modex_message_size;
|
size = modex_message_size;
|
||||||
|
|
||||||
@ -628,22 +634,35 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
|||||||
* policy. For ancient OFED, only allow if user has set
|
* policy. For ancient OFED, only allow if user has set
|
||||||
* the MCA parameter.
|
* the MCA parameter.
|
||||||
*/
|
*/
|
||||||
|
if (! mca_btl_openib_component.allow_ib
|
||||||
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET
|
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET
|
||||||
if ((IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer) &&
|
&& IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer
|
||||||
(false == mca_btl_openib_component.allow_ib)) {
|
|
||||||
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
|
|
||||||
true, opal_process_info.nodename,
|
|
||||||
ibv_get_device_name(device->ib_dev), port_num);
|
|
||||||
return OPAL_ERR_NOT_FOUND;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if (false == mca_btl_openib_component.allow_ib) {
|
|
||||||
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
|
|
||||||
true, opal_process_info.nodename,
|
|
||||||
ibv_get_device_name(device->ib_dev), port_num);
|
|
||||||
return OPAL_ERR_NOT_FOUND;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
) {
|
||||||
|
openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t));
|
||||||
|
if(NULL == openib_btl) {
|
||||||
|
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
|
||||||
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
memcpy(openib_btl, &mca_btl_openib_module,
|
||||||
|
sizeof(mca_btl_openib_module));
|
||||||
|
ib_selected = OBJ_NEW(mca_btl_base_selected_module_t);
|
||||||
|
ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl;
|
||||||
|
openib_btl->device = device;
|
||||||
|
openib_btl->port_num = (uint8_t) port_num;
|
||||||
|
openib_btl->allowed = false;
|
||||||
|
OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t);
|
||||||
|
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
|
||||||
|
opal_pointer_array_add(device->device_btls, (void*) openib_btl);
|
||||||
|
++device->btls;
|
||||||
|
++mca_btl_openib_component.ib_num_btls;
|
||||||
|
if (-1 != mca_btl_openib_component.ib_max_btls &&
|
||||||
|
mca_btl_openib_component.ib_num_btls >=
|
||||||
|
mca_btl_openib_component.ib_max_btls) {
|
||||||
|
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
|
||||||
|
}
|
||||||
|
return OPAL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Ensure that the requested GID index (via the
|
/* Ensure that the requested GID index (via the
|
||||||
@ -880,10 +899,13 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
openib_btl->allowed = true;
|
||||||
|
|
||||||
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
|
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
|
||||||
opal_pointer_array_add(device->device_btls, (void*) openib_btl);
|
opal_pointer_array_add(device->device_btls, (void*) openib_btl);
|
||||||
++device->btls;
|
++device->btls;
|
||||||
++mca_btl_openib_component.ib_num_btls;
|
++mca_btl_openib_component.ib_num_btls;
|
||||||
|
++mca_btl_openib_component.ib_allowed_btls;
|
||||||
if (-1 != mca_btl_openib_component.ib_max_btls &&
|
if (-1 != mca_btl_openib_component.ib_max_btls &&
|
||||||
mca_btl_openib_component.ib_num_btls >=
|
mca_btl_openib_component.ib_num_btls >=
|
||||||
mca_btl_openib_component.ib_max_btls) {
|
mca_btl_openib_component.ib_max_btls) {
|
||||||
@ -2912,36 +2934,38 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
goto no_btls;
|
goto no_btls;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now that we know we have devices and ports that we want to use,
|
if (0 < mca_btl_openib_component.ib_allowed_btls) {
|
||||||
init CPC components */
|
/* Now that we know we have devices and ports that we want to use,
|
||||||
if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) {
|
init CPC components */
|
||||||
goto no_btls;
|
if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) {
|
||||||
}
|
|
||||||
|
|
||||||
/* Setup the BSRQ QP's based on the final value of
|
|
||||||
mca_btl_openib_component.receive_queues. */
|
|
||||||
if (OPAL_SUCCESS != setup_qps()) {
|
|
||||||
goto no_btls;
|
|
||||||
}
|
|
||||||
if (mca_btl_openib_component.num_srq_qps > 0 ||
|
|
||||||
mca_btl_openib_component.num_xrc_qps > 0) {
|
|
||||||
opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table;
|
|
||||||
if(OPAL_SUCCESS != opal_hash_table_init(
|
|
||||||
srq_addr_table, (mca_btl_openib_component.num_srq_qps +
|
|
||||||
mca_btl_openib_component.num_xrc_qps) *
|
|
||||||
mca_btl_openib_component.ib_num_btls)) {
|
|
||||||
BTL_ERROR(("SRQ internal error. Failed to allocate SRQ addr hash table"));
|
|
||||||
goto no_btls;
|
goto no_btls;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* For XRC:
|
/* Setup the BSRQ QP's based on the final value of
|
||||||
* from this point we know if MCA_BTL_XRC_ENABLED it true or false */
|
mca_btl_openib_component.receive_queues. */
|
||||||
|
if (OPAL_SUCCESS != setup_qps()) {
|
||||||
|
goto no_btls;
|
||||||
|
}
|
||||||
|
if (mca_btl_openib_component.num_srq_qps > 0 ||
|
||||||
|
mca_btl_openib_component.num_xrc_qps > 0) {
|
||||||
|
opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table;
|
||||||
|
if(OPAL_SUCCESS != opal_hash_table_init(
|
||||||
|
srq_addr_table, (mca_btl_openib_component.num_srq_qps +
|
||||||
|
mca_btl_openib_component.num_xrc_qps) *
|
||||||
|
mca_btl_openib_component.ib_num_btls)) {
|
||||||
|
BTL_ERROR(("SRQ internal error. Failed to allocate SRQ addr hash table"));
|
||||||
|
goto no_btls;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Init XRC IB Addr hash table */
|
/* For XRC:
|
||||||
if (MCA_BTL_XRC_ENABLED) {
|
* from this point we know if MCA_BTL_XRC_ENABLED it true or false */
|
||||||
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_addr_table,
|
|
||||||
opal_hash_table_t);
|
/* Init XRC IB Addr hash table */
|
||||||
|
if (MCA_BTL_XRC_ENABLED) {
|
||||||
|
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_addr_table,
|
||||||
|
opal_hash_table_t);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Allocate space for btl modules */
|
/* Allocate space for btl modules */
|
||||||
@ -2967,31 +2991,34 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
ib_selected = (mca_btl_base_selected_module_t*)item;
|
ib_selected = (mca_btl_base_selected_module_t*)item;
|
||||||
openib_btl = (mca_btl_openib_module_t*)ib_selected->btl_module;
|
openib_btl = (mca_btl_openib_module_t*)ib_selected->btl_module;
|
||||||
|
|
||||||
/* Search for a CPC that can handle this port */
|
if (openib_btl->allowed) {
|
||||||
ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl);
|
/* Search for a CPC that can handle this port */
|
||||||
/* If we get NOT_SUPPORTED, then no CPC was found for this
|
ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl);
|
||||||
port. But that's not a fatal error -- just keep going;
|
/* If we get NOT_SUPPORTED, then no CPC was found for this
|
||||||
let's see if we find any usable openib modules or not. */
|
port. But that's not a fatal error -- just keep going;
|
||||||
if (OPAL_ERR_NOT_SUPPORTED == ret) {
|
let's see if we find any usable openib modules or not. */
|
||||||
continue;
|
if (OPAL_ERR_NOT_SUPPORTED == ret) {
|
||||||
} else if (OPAL_SUCCESS != ret) {
|
continue;
|
||||||
/* All others *are* fatal. Note that we already did a
|
} else if (OPAL_SUCCESS != ret) {
|
||||||
show_help in the lower layer */
|
/* All others *are* fatal. Note that we already did a
|
||||||
goto no_btls;
|
show_help in the lower layer */
|
||||||
}
|
goto no_btls;
|
||||||
|
}
|
||||||
|
|
||||||
if (mca_btl_openib_component.max_hw_msg_size > 0 &&
|
if (mca_btl_openib_component.max_hw_msg_size > 0 &&
|
||||||
(uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) {
|
(uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) {
|
||||||
BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")",
|
BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")",
|
||||||
mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz));
|
mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (finish_btl_init(openib_btl) != OPAL_SUCCESS) {
|
||||||
|
goto no_btls;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mca_btl_openib_component.openib_btls[i] = openib_btl;
|
mca_btl_openib_component.openib_btls[i] = openib_btl;
|
||||||
OBJ_RELEASE(ib_selected);
|
OBJ_RELEASE(ib_selected);
|
||||||
btls[i] = &openib_btl->super;
|
btls[i] = &openib_btl->super;
|
||||||
if (finish_btl_init(openib_btl) != OPAL_SUCCESS) {
|
|
||||||
goto no_btls;
|
|
||||||
}
|
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
/* If we got nothing, then error out */
|
/* If we got nothing, then error out */
|
||||||
@ -3039,6 +3066,7 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
there are no openib BTL's in this process and return NULL. */
|
there are no openib BTL's in this process and return NULL. */
|
||||||
|
|
||||||
mca_btl_openib_component.ib_num_btls = 0;
|
mca_btl_openib_component.ib_num_btls = 0;
|
||||||
|
mca_btl_openib_component.ib_allowed_btls = 0;
|
||||||
btl_openib_modex_send();
|
btl_openib_modex_send();
|
||||||
if (NULL != btls) {
|
if (NULL != btls) {
|
||||||
free(btls);
|
free(btls);
|
||||||
|
@ -13,8 +13,8 @@
|
|||||||
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015-2016 Research Organization for Information Science
|
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
|
||||||
* Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
@ -277,6 +277,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc)
|
|||||||
|
|
||||||
if (0 == ib_proc->proc_port_count) {
|
if (0 == ib_proc->proc_port_count) {
|
||||||
ib_proc->proc_endpoints = NULL;
|
ib_proc->proc_endpoints = NULL;
|
||||||
|
goto no_err_exit;
|
||||||
} else {
|
} else {
|
||||||
ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**)
|
ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**)
|
||||||
malloc(ib_proc->proc_port_count *
|
malloc(ib_proc->proc_port_count *
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user