Merge pull request #1895 from PDeveze/Patchs-on-btl-portals4
btl/portals4: Take into account the limitation of portals4 (max_msg_s…
Этот коммит содержится в:
Коммит
b90da992c8
@ -99,7 +99,6 @@ btl_portals4_init_interface(void)
|
||||
|
||||
/* Create recv_idx portal table entry */
|
||||
ret = PtlPTAlloc(portals4_btl->portals_ni_h,
|
||||
PTL_PT_ONLY_USE_ONCE |
|
||||
PTL_PT_ONLY_TRUNCATE,
|
||||
portals4_btl->recv_eq_h,
|
||||
REQ_BTL_TABLE_ID,
|
||||
@ -429,7 +428,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
|
||||
opal_bitmap_set_bit(reachable, i);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"add_procs: rank=%x nid=%x pid=%x for NI %d\n",
|
||||
"add_procs: rank=%lx nid=%x pid=%x for NI %d",
|
||||
i,
|
||||
btl_peer_data[i]->ptl_proc.phys.nid,
|
||||
btl_peer_data[i]->ptl_proc.phys.pid,
|
||||
@ -591,7 +590,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
ret = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
|
||||
*size = max_data;
|
||||
if ( ret < 0 ) {
|
||||
if (ret < 0) {
|
||||
mca_btl_portals4_free(btl_base, (mca_btl_base_descriptor_t *) frag);
|
||||
return NULL;
|
||||
}
|
||||
@ -624,53 +623,52 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
|
||||
}
|
||||
|
||||
handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
|
||||
handle->remote_offset = 0;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld\n",
|
||||
portals4_btl->interface_num, base, size, (void *)handle, handle->key));
|
||||
"mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld flags=%d",
|
||||
portals4_btl->interface_num, base, size, (void *)handle, handle->key, flags));
|
||||
|
||||
if (MCA_BTL_FLAGS_PUT == flags) {
|
||||
/* create a match entry */
|
||||
me.start = base;
|
||||
me.length = size;
|
||||
me.ct_handle = PTL_CT_NONE;
|
||||
me.min_free = 0;
|
||||
me.uid = PTL_UID_ANY;
|
||||
me.options = PTL_ME_OP_GET | PTL_ME_USE_ONCE |
|
||||
PTL_ME_EVENT_LINK_DISABLE |
|
||||
PTL_ME_EVENT_COMM_DISABLE |
|
||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
/* create a match entry */
|
||||
me.start = base;
|
||||
me.length = size;
|
||||
me.ct_handle = PTL_CT_NONE;
|
||||
me.min_free = 0;
|
||||
me.uid = PTL_UID_ANY;
|
||||
me.options = PTL_ME_OP_GET |
|
||||
PTL_ME_EVENT_LINK_DISABLE |
|
||||
PTL_ME_EVENT_COMM_DISABLE |
|
||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
|
||||
if (mca_btl_portals4_component.use_logical) {
|
||||
me.match_id.rank = endpoint->ptl_proc.rank;
|
||||
} else {
|
||||
me.match_id.phys.nid = endpoint->ptl_proc.phys.nid;
|
||||
me.match_id.phys.pid = endpoint->ptl_proc.phys.pid;
|
||||
}
|
||||
me.match_bits = handle->key;
|
||||
me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK |
|
||||
BTL_PORTALS4_CONTEXT_MASK |
|
||||
BTL_PORTALS4_SOURCE_MASK;
|
||||
me.ignore_bits = 0;
|
||||
|
||||
ret = PtlMEAppend(portals4_btl->portals_ni_h,
|
||||
portals4_btl->recv_idx,
|
||||
&me,
|
||||
PTL_PRIORITY_LIST,
|
||||
handle,
|
||||
&(handle->me_h));
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, opal_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEAppend failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n",
|
||||
(void *)handle, handle->me_h, me.start, me.length,
|
||||
me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits));
|
||||
if (mca_btl_portals4_component.use_logical) {
|
||||
me.match_id.rank = endpoint->ptl_proc.rank;
|
||||
} else {
|
||||
me.match_id.phys.nid = endpoint->ptl_proc.phys.nid;
|
||||
me.match_id.phys.pid = endpoint->ptl_proc.phys.pid;
|
||||
}
|
||||
me.match_bits = handle->key;
|
||||
me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK |
|
||||
BTL_PORTALS4_CONTEXT_MASK |
|
||||
BTL_PORTALS4_SOURCE_MASK;
|
||||
me.ignore_bits = 0;
|
||||
|
||||
ret = PtlMEAppend(portals4_btl->portals_ni_h,
|
||||
portals4_btl->recv_idx,
|
||||
&me,
|
||||
PTL_PRIORITY_LIST,
|
||||
handle,
|
||||
&(handle->me_h));
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, opal_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEAppend failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n",
|
||||
(void *)handle, handle->me_h, me.start, me.length,
|
||||
me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits));
|
||||
return handle;
|
||||
}
|
||||
|
||||
@ -678,11 +676,22 @@ int
|
||||
mca_btl_portals4_deregister_mem(mca_btl_base_module_t *btl_base,
|
||||
mca_btl_base_registration_handle_t *handle)
|
||||
{
|
||||
int ret;
|
||||
struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld\n",
|
||||
portals4_btl->interface_num, (void *)handle, handle->key));
|
||||
"mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld me_h=%d\n",
|
||||
portals4_btl->interface_num, (void *)handle, handle->key, handle->me_h));
|
||||
|
||||
if (!PtlHandleIsEqual(handle->me_h, PTL_INVALID_HANDLE)) {
|
||||
ret = PtlMEUnlink(handle->me_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, opal_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEUnlink failed: %d\n",__FILE__, __LINE__, ret);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
handle->me_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
free(handle);
|
||||
|
||||
|
@ -79,6 +79,9 @@ struct mca_btl_portals4_component_t {
|
||||
|
||||
/** Event queue handles table used in PtlEQPoll */
|
||||
ptl_handle_eq_t *eqs_h;
|
||||
|
||||
/** Upper limit for message sizes */
|
||||
unsigned long portals_max_msg_size;
|
||||
};
|
||||
|
||||
typedef struct mca_btl_portals4_component_t mca_btl_portals4_component_t;
|
||||
@ -255,6 +258,8 @@ struct mca_btl_base_registration_handle_t {
|
||||
ptl_match_bits_t key;
|
||||
/** Portals4 me_h */
|
||||
ptl_handle_me_t me_h;
|
||||
/** Remote offset */
|
||||
ptl_size_t remote_offset;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -201,6 +201,18 @@ mca_btl_portals4_component_register(void)
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&(mca_btl_portals4_component.portals_recv_mds_size));
|
||||
|
||||
mca_btl_portals4_component.portals_max_msg_size = PTL_SIZE_MAX;
|
||||
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
|
||||
"max_msg_size",
|
||||
"Max size supported by portals4 (above that, a message is cut into messages less than that size)",
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&(mca_btl_portals4_component.portals_max_msg_size));
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -216,6 +228,8 @@ mca_btl_portals4_component_open(void)
|
||||
mca_btl_portals4_module.super.btl_eager_limit = 32 * 1024;
|
||||
mca_btl_portals4_module.super.btl_rndv_eager_limit = 32 * 1024;
|
||||
mca_btl_portals4_module.super.btl_max_send_size = 64 * 1024;
|
||||
if (mca_btl_portals4_module.super.btl_max_send_size > mca_btl_portals4_component.portals_max_msg_size)
|
||||
mca_btl_portals4_module.super.btl_max_send_size = mca_btl_portals4_component.portals_max_msg_size;
|
||||
mca_btl_portals4_module.super.btl_rdma_pipeline_send_length = 64 * 1024;
|
||||
mca_btl_portals4_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
|
||||
mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0;
|
||||
@ -227,6 +241,8 @@ mca_btl_portals4_component_open(void)
|
||||
mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
|
||||
|
||||
mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX;
|
||||
if (mca_btl_portals4_module.super.btl_get_limit > mca_btl_portals4_component.portals_max_msg_size)
|
||||
mca_btl_portals4_module.super.btl_get_limit = mca_btl_portals4_component.portals_max_msg_size;
|
||||
mca_btl_portals4_module.super.btl_put_limit = 0; /* not implemented */
|
||||
mca_btl_portals4_module.super.btl_get_alignment = 0;
|
||||
mca_btl_portals4_module.super.btl_put_alignment = 0;
|
||||
@ -293,6 +309,7 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
|
||||
mca_btl_base_module_t **btls = NULL;
|
||||
unsigned int ret, interface;
|
||||
ptl_handle_ni_t *portals4_nis_h = NULL;
|
||||
ptl_ni_limits_t portals4_ni_limits ;
|
||||
ptl_process_t *ptl_process_ids = NULL;
|
||||
|
||||
opal_output_verbose(50, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_init\n");
|
||||
@ -325,14 +342,14 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
|
||||
PTL_NI_LOGICAL | PTL_NI_MATCHING,
|
||||
PTL_PID_ANY, /* let library assign our pid */
|
||||
NULL, /* no desired limits */
|
||||
NULL, /* actual limits */
|
||||
&portals4_ni_limits, /* actual limits */
|
||||
&portals4_nis_h[*num_btls] /* our interface handle */
|
||||
);
|
||||
else ret = PtlNIInit((1 == mca_btl_portals4_component.max_btls) ? PTL_IFACE_DEFAULT : interface,
|
||||
PTL_NI_PHYSICAL | PTL_NI_MATCHING,
|
||||
PTL_PID_ANY, /* let library assign our pid */
|
||||
NULL, /* no desired limits */
|
||||
NULL, /* actual limits */
|
||||
&portals4_ni_limits, /* actual limits */
|
||||
&portals4_nis_h[*num_btls] /* our interface handle */
|
||||
);
|
||||
if (PTL_OK != ret) {
|
||||
@ -340,7 +357,15 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
|
||||
"%s:%d: PtlNIInit failed for NI %d: %d\n", __FILE__, __LINE__, interface, ret);
|
||||
}
|
||||
else {
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d\n", *num_btls));
|
||||
if (mca_btl_portals4_component.portals_max_msg_size > portals4_ni_limits.max_msg_size)
|
||||
mca_btl_portals4_component.portals_max_msg_size = portals4_ni_limits.max_msg_size;
|
||||
if (mca_btl_portals4_module.super.btl_max_send_size > portals4_ni_limits.max_msg_size)
|
||||
mca_btl_portals4_module.super.btl_max_send_size = portals4_ni_limits.max_msg_size;
|
||||
if (mca_btl_portals4_module.super.btl_get_limit > portals4_ni_limits.max_msg_size)
|
||||
mca_btl_portals4_module.super.btl_get_limit = portals4_ni_limits.max_msg_size;
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d max_msg_size=%ld",
|
||||
*num_btls, mca_btl_portals4_component.portals_max_msg_size));
|
||||
|
||||
(*num_btls)++;
|
||||
}
|
||||
}
|
||||
@ -698,7 +723,7 @@ mca_btl_portals4_component_progress(void)
|
||||
frag->peer_proc,
|
||||
portals4_btl->recv_idx,
|
||||
frag->match_bits, /* match bits */
|
||||
0,
|
||||
0, // Warning : should be ev.remote_offset but it is not defined,
|
||||
frag);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_output_verbose(1, opal_btl_base_framework.framework_output,
|
||||
|
@ -50,7 +50,6 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
|
||||
{
|
||||
mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base;
|
||||
mca_btl_portals4_frag_t *frag = NULL;
|
||||
ptl_md_t md;
|
||||
int ret;
|
||||
|
||||
/* reserve space in the event queue for rdma operations immediately */
|
||||
@ -83,8 +82,8 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
|
||||
frag->length = size;
|
||||
frag->peer_proc = btl_peer->ptl_proc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
|
||||
md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet offset=%p length=%ld remote_offset=%p nid=%x pid=%x match_bits=%lx",
|
||||
local_address, size, (void*)local_handle->remote_offset, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));
|
||||
|
||||
ret = PtlGet(portals4_btl->send_md_h,
|
||||
(ptl_size_t) local_address,
|
||||
@ -92,7 +91,7 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
|
||||
btl_peer->ptl_proc,
|
||||
portals4_btl->recv_idx,
|
||||
frag->match_bits, /* match bits */
|
||||
0,
|
||||
local_handle->remote_offset,
|
||||
frag);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_output_verbose(1, opal_btl_base_framework.framework_output,
|
||||
@ -100,8 +99,7 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
|
||||
__FILE__, __LINE__, ret);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
|
||||
md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));
|
||||
local_handle->remote_offset += size;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user