1
1

btl/openib: delay UCX warning to add_procs()

If UCX is available, then pml/ucx will be used instead of
pml/ob1 + btl/openib, so there is no need to warn about
btl/openib not supporting Infiniband.

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>

(cherry picked from commit open-mpi/ompi@0a2ce58040)
Этот коммит содержится в:
Gilles Gouaillardet 2018-11-29 11:38:07 +09:00
родитель efcbc13d2f
Коммит d7053a306a
4 изменённых файлов: 174 добавлений и 130 удалений

Просмотреть файл

@ -19,7 +19,7 @@
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved
* $COPYRIGHT$
@ -1040,6 +1040,14 @@ int mca_btl_openib_add_procs(
int btl_rank = 0;
volatile mca_btl_base_endpoint_t* endpoint;
if (! openib_btl->allowed) {
opal_bitmap_clear_all_bits(reachable);
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
true, opal_process_info.nodename,
ibv_get_device_name(openib_btl->device->ib_dev), openib_btl->port_num);
}
btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt);
if( 0 > btl_rank ){
return OPAL_ERR_NOT_FOUND;
@ -1639,6 +1647,7 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
return OPAL_SUCCESS;
}
if (openib_btl->allowed) {
/* Release all QPs */
if (NULL != openib_btl->device->endpoints) {
for (ep_index=0;
@ -1708,6 +1717,7 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
free(openib_btl->cpcs[i]);
}
free(openib_btl->cpcs);
}
/* Release device if there are no more users */
if(!(--openib_btl->device->btls)) {

Просмотреть файл

@ -18,7 +18,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
@ -164,6 +164,9 @@ struct mca_btl_openib_component_t {
int ib_num_btls;
/**< number of devices available to the openib component */
int ib_allowed_btls;
/**< number of devices allowed to the openib component */
struct mca_btl_openib_module_t **openib_btls;
/**< array of available BTLs */
@ -501,6 +504,8 @@ struct mca_btl_openib_module_t {
int local_procs; /** number of local procs */
bool atomic_ops_be; /** atomic result is big endian */
bool allowed; /** is this port allowed */
};
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;

Просмотреть файл

@ -19,7 +19,7 @@
* Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* $COPYRIGHT$
@ -269,7 +269,7 @@ static int btl_openib_modex_send(void)
/* uint8_t for number of modules in the message */
1 +
/* For each module: */
mca_btl_openib_component.ib_num_btls *
mca_btl_openib_component.ib_allowed_btls *
(
/* Common module data */
modex_message_size +
@ -278,6 +278,9 @@ static int btl_openib_modex_send(void)
);
/* For each module, add in the size of the per-CPC data */
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
if (! mca_btl_openib_component.openib_btls[i]->allowed) {
continue;
}
for (j = 0;
j < mca_btl_openib_component.openib_btls[i]->num_cpcs;
++j) {
@ -300,12 +303,15 @@ static int btl_openib_modex_send(void)
/* Pack the number of modules */
offset = message;
pack8(&offset, mca_btl_openib_component.ib_num_btls);
opal_output(-1, "modex sending %d btls (packed: %d, offset now at %d)", mca_btl_openib_component.ib_num_btls, *((uint8_t*) message), (int) (offset - message));
pack8(&offset, mca_btl_openib_component.ib_allowed_btls);
opal_output(-1, "modex sending %d btls (packed: %d, offset now at %d)", mca_btl_openib_component.ib_allowed_btls, *((uint8_t*) message), (int) (offset - message));
/* Pack each of the modules */
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
if (! mca_btl_openib_component.openib_btls[i]->allowed) {
continue;
}
/* Pack the modex common message struct. */
size = modex_message_size;
@ -628,22 +634,35 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
* policy. For ancient OFED, only allow if user has set
* the MCA parameter.
*/
if (! mca_btl_openib_component.allow_ib
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET
if ((IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer) &&
(false == mca_btl_openib_component.allow_ib)) {
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
true, opal_process_info.nodename,
ibv_get_device_name(device->ib_dev), port_num);
return OPAL_ERR_NOT_FOUND;
}
#else
if (false == mca_btl_openib_component.allow_ib) {
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
true, opal_process_info.nodename,
ibv_get_device_name(device->ib_dev), port_num);
return OPAL_ERR_NOT_FOUND;
}
&& IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer
#endif
) {
openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t));
if(NULL == openib_btl) {
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return OPAL_ERR_OUT_OF_RESOURCE;
}
memcpy(openib_btl, &mca_btl_openib_module,
sizeof(mca_btl_openib_module));
ib_selected = OBJ_NEW(mca_btl_base_selected_module_t);
ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl;
openib_btl->device = device;
openib_btl->port_num = (uint8_t) port_num;
openib_btl->allowed = false;
OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t);
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
opal_pointer_array_add(device->device_btls, (void*) openib_btl);
++device->btls;
++mca_btl_openib_component.ib_num_btls;
if (-1 != mca_btl_openib_component.ib_max_btls &&
mca_btl_openib_component.ib_num_btls >=
mca_btl_openib_component.ib_max_btls) {
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
return OPAL_SUCCESS;
}
/* Ensure that the requested GID index (via the
@ -880,10 +899,13 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
}
}
openib_btl->allowed = true;
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
opal_pointer_array_add(device->device_btls, (void*) openib_btl);
++device->btls;
++mca_btl_openib_component.ib_num_btls;
++mca_btl_openib_component.ib_allowed_btls;
if (-1 != mca_btl_openib_component.ib_max_btls &&
mca_btl_openib_component.ib_num_btls >=
mca_btl_openib_component.ib_max_btls) {
@ -2912,6 +2934,7 @@ btl_openib_component_init(int *num_btl_modules,
goto no_btls;
}
if (0 < mca_btl_openib_component.ib_allowed_btls) {
/* Now that we know we have devices and ports that we want to use,
init CPC components */
if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) {
@ -2943,6 +2966,7 @@ btl_openib_component_init(int *num_btl_modules,
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_addr_table,
opal_hash_table_t);
}
}
/* Allocate space for btl modules */
mca_btl_openib_component.openib_btls =
@ -2967,6 +2991,7 @@ btl_openib_component_init(int *num_btl_modules,
ib_selected = (mca_btl_base_selected_module_t*)item;
openib_btl = (mca_btl_openib_module_t*)ib_selected->btl_module;
if (openib_btl->allowed) {
/* Search for a CPC that can handle this port */
ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl);
/* If we get NOT_SUPPORTED, then no CPC was found for this
@ -2986,12 +3011,14 @@ btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz));
}
mca_btl_openib_component.openib_btls[i] = openib_btl;
OBJ_RELEASE(ib_selected);
btls[i] = &openib_btl->super;
if (finish_btl_init(openib_btl) != OPAL_SUCCESS) {
goto no_btls;
}
}
mca_btl_openib_component.openib_btls[i] = openib_btl;
OBJ_RELEASE(ib_selected);
btls[i] = &openib_btl->super;
++i;
}
/* If we got nothing, then error out */
@ -3039,6 +3066,7 @@ btl_openib_component_init(int *num_btl_modules,
there are no openib BTL's in this process and return NULL. */
mca_btl_openib_component.ib_num_btls = 0;
mca_btl_openib_component.ib_allowed_btls = 0;
btl_openib_modex_send();
if (NULL != btls) {
free(btls);

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
* Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights
@ -277,6 +277,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc)
if (0 == ib_proc->proc_port_count) {
ib_proc->proc_endpoints = NULL;
goto no_err_exit;
} else {
ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**)
malloc(ib_proc->proc_port_count *