The patch includes the following: * Add new mca parameter - btl_openib_max_hw_msg_size - Maximum size (in bytes) of a single fragment of a long message when using the RDMA protocols (must be > 0 and <= hw capabilities). * If btl_openib_max_hw_msg_size is larger than the maximum hw limitation print error message. * Change the default openib flags to include only PUT and not GET. * Print error message if user choose manually GET flag in openib btl. * In prepare_dst: limit the message size to be the minimum of both endpoint's hw_limitation and the user limitation (if requested). This commit was SVN r24191.
Этот коммит содержится в:
родитель
9251785161
Коммит
bfe611d3bd
@ -1195,18 +1195,38 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
||||
uint32_t flags)
|
||||
{
|
||||
mca_btl_openib_module_t *openib_btl;
|
||||
mca_btl_openib_component_t *openib_component;
|
||||
mca_btl_openib_com_frag_t *frag;
|
||||
mca_btl_openib_reg_t *openib_reg;
|
||||
size_t max_msg_sz;
|
||||
int rc;
|
||||
void *buffer;
|
||||
|
||||
openib_btl = (mca_btl_openib_module_t*)btl;
|
||||
openib_component = (mca_btl_openib_component_t*)btl->btl_component;
|
||||
|
||||
frag = alloc_recv_user_frag();
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* max_msg_sz is the maximum message size of the HCA (hw limitation)
|
||||
set the minimum between local max_msg_sz and the remote*/
|
||||
max_msg_sz = MIN(openib_btl->ib_port_attr.max_msg_sz,
|
||||
endpoint->endpoint_btl->ib_port_attr.max_msg_sz);
|
||||
|
||||
/* check if user has explicitly limited the max message size */
|
||||
if (openib_component->max_hw_msg_size > 0 &&
|
||||
max_msg_sz > openib_component->max_hw_msg_size) {
|
||||
max_msg_sz = openib_component->max_hw_msg_size;
|
||||
}
|
||||
|
||||
/* limit the message so to max_msg_sz*/
|
||||
if (*size > max_msg_sz) {
|
||||
*size = max_msg_sz;
|
||||
BTL_VERBOSE(("message size limited to %d", *size));
|
||||
}
|
||||
|
||||
opal_convertor_get_current_pointer(convertor, &buffer);
|
||||
|
||||
if(NULL == registration){
|
||||
|
@ -198,6 +198,7 @@ struct mca_btl_openib_component_t {
|
||||
|
||||
size_t eager_limit; /**< Eager send limit of first fragment, in Bytes */
|
||||
size_t max_send_size; /**< Maximum send size, in Bytes */
|
||||
int32_t max_hw_msg_size; /**< Maximum message size for RDMA protocols in Bytes */
|
||||
uint32_t reg_mru_len; /**< Length of the registration cache most recently used list */
|
||||
uint32_t use_srq; /**< Use the Shared Receive Queue (SRQ mode) */
|
||||
|
||||
|
@ -2480,6 +2480,17 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
}
|
||||
}
|
||||
|
||||
index = mca_base_param_find("btl", "openib", "flags");
|
||||
if (index >= 0) {
|
||||
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value)) {
|
||||
if (value & MCA_BTL_FLAGS_GET) {
|
||||
/* Until GET flow is fixed - we do not support GET
|
||||
in openib btl. */
|
||||
BTL_ERROR(("openib btl does not support GET flag"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&mca_btl_openib_component.send_free_coalesced, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_openib_component.send_user_free, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_openib_component.recv_user_free, ompi_free_list_t);
|
||||
@ -2750,6 +2761,12 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
goto no_btls;
|
||||
}
|
||||
|
||||
if (mca_btl_openib_component.max_hw_msg_size > 0 &&
|
||||
mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) {
|
||||
BTL_ERROR(("max_hw_msg_size (%d) is larger than hw max message size (%d)",
|
||||
mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz));
|
||||
}
|
||||
|
||||
mca_btl_openib_component.openib_btls[i] = openib_btl;
|
||||
OBJ_RELEASE(ib_selected);
|
||||
btls[i] = &openib_btl->super;
|
||||
|
@ -502,6 +502,11 @@ int btl_openib_register_mca_params(void)
|
||||
10, &ival, REGINT_GE_ONE));
|
||||
mca_btl_openib_component.cq_poll_progress = (uint32_t)ival;
|
||||
|
||||
CHECK(reg_int("max_hw_msg_size", NULL,
|
||||
"Maximum size (in bytes) of a single fragment of a long message when using the RDMA protocols (must be > 0 and <= hw capabilities).",
|
||||
-1, &ival, REGINT_NEG_ONE_OK|REGINT_GE_ZERO));
|
||||
mca_btl_openib_component.max_hw_msg_size = (int32_t)ival;
|
||||
|
||||
/* Info only */
|
||||
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
|
||||
"have_fork_support",
|
||||
@ -519,7 +524,7 @@ int btl_openib_register_mca_params(void)
|
||||
mca_btl_openib_module.super.btl_rdma_pipeline_send_length = 1024 * 1024;
|
||||
mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024;
|
||||
mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024;
|
||||
mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA |
|
||||
mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_PUT |
|
||||
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
|
||||
#if OMPI_OPENIB_FAILOVER_ENABLED
|
||||
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_FAILOVER_SUPPORT;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user