diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index 8f7871423b..b4504d502c 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -99,7 +99,6 @@ btl_portals4_init_interface(void) /* Create recv_idx portal table entry */ ret = PtlPTAlloc(portals4_btl->portals_ni_h, - PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE, portals4_btl->recv_eq_h, REQ_BTL_TABLE_ID, @@ -429,7 +428,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base, opal_bitmap_set_bit(reachable, i); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "add_procs: rank=%x nid=%x pid=%x for NI %d\n", + "add_procs: rank=%lx nid=%x pid=%x for NI %d", i, btl_peer_data[i]->ptl_proc.phys.nid, btl_peer_data[i]->ptl_proc.phys.pid, @@ -591,7 +590,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base, ret = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); *size = max_data; - if ( ret < 0 ) { + if (ret < 0) { mca_btl_portals4_free(btl_base, (mca_btl_base_descriptor_t *) frag); return NULL; } @@ -624,53 +623,52 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base, } handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1); + handle->remote_offset = 0; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld\n", - portals4_btl->interface_num, base, size, (void *)handle, handle->key)); + "mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld flags=%d", + portals4_btl->interface_num, base, size, (void *)handle, handle->key, flags)); - if (MCA_BTL_FLAGS_PUT == flags) { - /* create a match entry */ - me.start = base; - me.length = size; - me.ct_handle = PTL_CT_NONE; - me.min_free = 0; - me.uid = PTL_UID_ANY; - me.options = PTL_ME_OP_GET | PTL_ME_USE_ONCE | - PTL_ME_EVENT_LINK_DISABLE | - PTL_ME_EVENT_COMM_DISABLE | - PTL_ME_EVENT_UNLINK_DISABLE; + /* create a match entry */ + me.start = base; + me.length = size; + me.ct_handle = PTL_CT_NONE; + me.min_free = 0; + me.uid = PTL_UID_ANY; + me.options = PTL_ME_OP_GET | + PTL_ME_EVENT_LINK_DISABLE | + PTL_ME_EVENT_COMM_DISABLE | + PTL_ME_EVENT_UNLINK_DISABLE; - if (mca_btl_portals4_component.use_logical) { - me.match_id.rank = endpoint->ptl_proc.rank; - } else { - me.match_id.phys.nid = endpoint->ptl_proc.phys.nid; - me.match_id.phys.pid = endpoint->ptl_proc.phys.pid; - } - me.match_bits = handle->key; - me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK | - BTL_PORTALS4_CONTEXT_MASK | - BTL_PORTALS4_SOURCE_MASK; - me.ignore_bits = 0; - - ret = PtlMEAppend(portals4_btl->portals_ni_h, - portals4_btl->recv_idx, - &me, - PTL_PRIORITY_LIST, - handle, - &(handle->me_h)); - if (PTL_OK != ret) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: PtlMEAppend failed: %d\n", - __FILE__, __LINE__, ret); - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); - return NULL; - } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n", - (void *)handle, handle->me_h, me.start, me.length, - me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits)); + if (mca_btl_portals4_component.use_logical) { + me.match_id.rank = endpoint->ptl_proc.rank; + } else { + me.match_id.phys.nid = endpoint->ptl_proc.phys.nid; + me.match_id.phys.pid = endpoint->ptl_proc.phys.pid; } + me.match_bits = handle->key; + me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK | + BTL_PORTALS4_CONTEXT_MASK | + BTL_PORTALS4_SOURCE_MASK; + me.ignore_bits = 0; + + ret = PtlMEAppend(portals4_btl->portals_ni_h, + portals4_btl->recv_idx, + &me, + PTL_PRIORITY_LIST, + handle, + &(handle->me_h)); + if (PTL_OK != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: PtlMEAppend failed: %d\n", + __FILE__, __LINE__, ret); + OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + return NULL; + } + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n", + (void *)handle, handle->me_h, me.start, me.length, + me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits)); return handle; } @@ -678,11 +676,22 @@ int mca_btl_portals4_deregister_mem(mca_btl_base_module_t *btl_base, mca_btl_base_registration_handle_t *handle) { + int ret; struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld\n", - portals4_btl->interface_num, (void *)handle, handle->key)); + "mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld me_h=%d\n", + portals4_btl->interface_num, (void *)handle, handle->key, handle->me_h)); + + if (!PtlHandleIsEqual(handle->me_h, PTL_INVALID_HANDLE)) { + ret = PtlMEUnlink(handle->me_h); + if (PTL_OK != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: PtlMEUnlink failed: %d\n",__FILE__, __LINE__, ret); + return OPAL_ERROR; + } + handle->me_h = PTL_INVALID_HANDLE; + } free(handle); diff --git a/opal/mca/btl/portals4/btl_portals4.h b/opal/mca/btl/portals4/btl_portals4.h index 81bc75735b..92c294b80a 100644 --- a/opal/mca/btl/portals4/btl_portals4.h +++ b/opal/mca/btl/portals4/btl_portals4.h @@ -79,6 +79,9 @@ struct mca_btl_portals4_component_t { /** Event queue handles table used in PtlEQPoll */ ptl_handle_eq_t *eqs_h; + + /** Upper limit for message sizes */ + unsigned long portals_max_msg_size; }; typedef struct mca_btl_portals4_component_t mca_btl_portals4_component_t; @@ -255,6 +258,8 @@ struct mca_btl_base_registration_handle_t { ptl_match_bits_t key; /** Portals4 me_h */ ptl_handle_me_t me_h; + /** Remote offset */ + ptl_size_t remote_offset; }; /* diff --git a/opal/mca/btl/portals4/btl_portals4_component.c b/opal/mca/btl/portals4/btl_portals4_component.c index 5834ac4744..eda9cd81f7 100644 --- a/opal/mca/btl/portals4/btl_portals4_component.c +++ b/opal/mca/btl/portals4/btl_portals4_component.c @@ -201,6 +201,18 @@ mca_btl_portals4_component_register(void) OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, &(mca_btl_portals4_component.portals_recv_mds_size)); + + mca_btl_portals4_component.portals_max_msg_size = PTL_SIZE_MAX; + (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version, + "max_msg_size", + "Max size supported by portals4 (above that, a message is cut into messages less than that size)", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, + 0, + 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &(mca_btl_portals4_component.portals_max_msg_size)); return OPAL_SUCCESS; } @@ -216,6 +228,8 @@ mca_btl_portals4_component_open(void) mca_btl_portals4_module.super.btl_eager_limit = 32 * 1024; mca_btl_portals4_module.super.btl_rndv_eager_limit = 32 * 1024; mca_btl_portals4_module.super.btl_max_send_size = 64 * 1024; + if (mca_btl_portals4_module.super.btl_max_send_size > mca_btl_portals4_component.portals_max_msg_size) + mca_btl_portals4_module.super.btl_max_send_size = mca_btl_portals4_component.portals_max_msg_size; mca_btl_portals4_module.super.btl_rdma_pipeline_send_length = 64 * 1024; mca_btl_portals4_module.super.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0; @@ -227,6 +241,8 @@ mca_btl_portals4_component_open(void) mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX; + if (mca_btl_portals4_module.super.btl_get_limit > mca_btl_portals4_component.portals_max_msg_size) + mca_btl_portals4_module.super.btl_get_limit = mca_btl_portals4_component.portals_max_msg_size; mca_btl_portals4_module.super.btl_put_limit = 0; /* not implemented */ mca_btl_portals4_module.super.btl_get_alignment = 0; mca_btl_portals4_module.super.btl_put_alignment = 0; @@ -293,6 +309,7 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, mca_btl_base_module_t **btls = NULL; unsigned int ret, interface; ptl_handle_ni_t *portals4_nis_h = NULL; + ptl_ni_limits_t portals4_ni_limits ; ptl_process_t *ptl_process_ids = NULL; opal_output_verbose(50, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_init\n"); @@ -325,14 +342,14 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, PTL_NI_LOGICAL | PTL_NI_MATCHING, PTL_PID_ANY, /* let library assign our pid */ NULL, /* no desired limits */ - NULL, /* actual limits */ + &portals4_ni_limits, /* actual limits */ &portals4_nis_h[*num_btls] /* our interface handle */ ); else ret = PtlNIInit((1 == mca_btl_portals4_component.max_btls) ? PTL_IFACE_DEFAULT : interface, PTL_NI_PHYSICAL | PTL_NI_MATCHING, PTL_PID_ANY, /* let library assign our pid */ NULL, /* no desired limits */ - NULL, /* actual limits */ + &portals4_ni_limits, /* actual limits */ &portals4_nis_h[*num_btls] /* our interface handle */ ); if (PTL_OK != ret) { @@ -340,7 +357,15 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, "%s:%d: PtlNIInit failed for NI %d: %d\n", __FILE__, __LINE__, interface, ret); } else { - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d\n", *num_btls)); + if (mca_btl_portals4_component.portals_max_msg_size > portals4_ni_limits.max_msg_size) + mca_btl_portals4_component.portals_max_msg_size = portals4_ni_limits.max_msg_size; + if (mca_btl_portals4_module.super.btl_max_send_size > portals4_ni_limits.max_msg_size) + mca_btl_portals4_module.super.btl_max_send_size = portals4_ni_limits.max_msg_size; + if (mca_btl_portals4_module.super.btl_get_limit > portals4_ni_limits.max_msg_size) + mca_btl_portals4_module.super.btl_get_limit = portals4_ni_limits.max_msg_size; + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d max_msg_size=%ld", + *num_btls, mca_btl_portals4_component.portals_max_msg_size)); + (*num_btls)++; } } @@ -698,7 +723,7 @@ mca_btl_portals4_component_progress(void) frag->peer_proc, portals4_btl->recv_idx, frag->match_bits, /* match bits */ - 0, + 0, // Warning : should be ev.remote_offset but it is not defined, frag); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, opal_btl_base_framework.framework_output, diff --git a/opal/mca/btl/portals4/btl_portals4_rdma.c b/opal/mca/btl/portals4/btl_portals4_rdma.c index be915d59b9..33fb9ab326 100644 --- a/opal/mca/btl/portals4/btl_portals4_rdma.c +++ b/opal/mca/btl/portals4/btl_portals4_rdma.c @@ -50,7 +50,6 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, { mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base; mca_btl_portals4_frag_t *frag = NULL; - ptl_md_t md; int ret; /* reserve space in the event queue for rdma operations immediately */ @@ -83,8 +82,8 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, frag->length = size; frag->peer_proc = btl_peer->ptl_proc; - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", - md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet offset=%p length=%ld remote_offset=%p nid=%x pid=%x match_bits=%lx", + local_address, size, (void*)local_handle->remote_offset, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); ret = PtlGet(portals4_btl->send_md_h, (ptl_size_t) local_address, @@ -92,7 +91,7 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, btl_peer->ptl_proc, portals4_btl->recv_idx, frag->match_bits, /* match bits */ - 0, + local_handle->remote_offset, frag); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, opal_btl_base_framework.framework_output, @@ -100,8 +99,7 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, __FILE__, __LINE__, ret); return OPAL_ERROR; } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", - md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); + local_handle->remote_offset += size; return OPAL_SUCCESS; }