From bca2522d8b7632e32cb9e7def384a9d6c01c1f29 Mon Sep 17 00:00:00 2001 From: Todd Kordenbrock Date: Fri, 10 Apr 2015 14:39:20 -0500 Subject: [PATCH 1/2] btl-portals4: fix send failure for messages larger than eager_size In the large message case, the sender issues a PtlMEAppend() in order to generate events when the receiver issues a PtlGet(). This commit moves the PtlMEAppend() from mca_btl_portals4_prepare_src() to mca_btl_portals4_register_mem() which is the way it's done in BTL 3.0. --- opal/mca/btl/portals4/btl_portals4.c | 87 ++++++++++++----------- opal/mca/btl/portals4/btl_portals4.h | 2 + opal/mca/btl/portals4/btl_portals4_rdma.c | 12 ++-- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index 90eb8d9f72..7b04fecb40 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -327,7 +327,6 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base, } else { /* no need to pack - rdma operation out of user's buffer */ - ptl_me_t me; /* reserve space in the event queue for rdma operations immediately */ while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > @@ -366,48 +365,6 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base, (unsigned long) frag, (unsigned long) frag->base.des_cbfunc, frag->segments[0].key, flags)); - - /* create a match entry */ - me.start = frag->segments[0].base.seg_addr.pval; - me.length = frag->segments[0].base.seg_len; - me.ct_handle = PTL_CT_NONE; - me.min_free = 0; - me.uid = PTL_UID_ANY; - me.options = PTL_ME_OP_GET | PTL_ME_USE_ONCE | - PTL_ME_EVENT_LINK_DISABLE | - PTL_ME_EVENT_COMM_DISABLE | - PTL_ME_EVENT_UNLINK_DISABLE; - - if (mca_btl_portals4_component.use_logical) { - me.match_id.rank = peer->ptl_proc.rank; - } else { - me.match_id.phys.nid = peer->ptl_proc.phys.nid; - me.match_id.phys.pid = peer->ptl_proc.phys.pid; - } - me.match_bits = frag->segments[0].key; - me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK | - BTL_PORTALS4_CONTEXT_MASK | - BTL_PORTALS4_SOURCE_MASK; - me.ignore_bits = 0; - - ret = PtlMEAppend(portals4_btl->portals_ni_h, - portals4_btl->recv_idx, - &me, - PTL_PRIORITY_LIST, - frag, - &(frag->me_h)); - if (PTL_OK != ret) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: PtlMEAppend failed: %d\n", - __FILE__, __LINE__, ret); - OPAL_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag); - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); - return NULL; - } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "PtlMEAppend (prepare_src) frag=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n", - (void *)frag, frag->me_h, me.start, me.length, - me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits)); } frag->base.des_segments = &frag->segments[0].base; @@ -425,6 +382,8 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base, { struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; mca_btl_base_registration_handle_t *handle = NULL; + ptl_me_t me; + int ret; handle = (mca_btl_base_registration_handle_t *)malloc(sizeof(mca_btl_base_registration_handle_t)); if (!handle) { @@ -437,6 +396,48 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base, "mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld\n", portals4_btl->interface_num, base, size, (void *)handle, handle->key)); + if (MCA_BTL_FLAGS_PUT == flags) { + /* create a match entry */ + me.start = base; + me.length = size; + me.ct_handle = PTL_CT_NONE; + me.min_free = 0; + me.uid = PTL_UID_ANY; + me.options = PTL_ME_OP_GET | PTL_ME_USE_ONCE | + PTL_ME_EVENT_LINK_DISABLE | + PTL_ME_EVENT_COMM_DISABLE | + PTL_ME_EVENT_UNLINK_DISABLE; + + if (mca_btl_portals4_component.use_logical) { + me.match_id.rank = endpoint->ptl_proc.rank; + } else { + me.match_id.phys.nid = endpoint->ptl_proc.phys.nid; + me.match_id.phys.pid = endpoint->ptl_proc.phys.pid; + } + me.match_bits = handle->key; + me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK | + BTL_PORTALS4_CONTEXT_MASK | + BTL_PORTALS4_SOURCE_MASK; + me.ignore_bits = 0; + + ret = PtlMEAppend(portals4_btl->portals_ni_h, + portals4_btl->recv_idx, + &me, + PTL_PRIORITY_LIST, + handle, + &(handle->me_h)); + if (PTL_OK != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: PtlMEAppend failed: %d\n", + __FILE__, __LINE__, ret); + OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + return NULL; + } + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n", + (void *)handle, handle->me_h, me.start, me.length, + me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits)); + } return handle; } diff --git a/opal/mca/btl/portals4/btl_portals4.h b/opal/mca/btl/portals4/btl_portals4.h index 7a234eab7d..83c31c43fc 100644 --- a/opal/mca/btl/portals4/btl_portals4.h +++ b/opal/mca/btl/portals4/btl_portals4.h @@ -284,6 +284,8 @@ int mca_btl_portals4_get_error(int ptl_error); struct mca_btl_base_registration_handle_t { /** Portals4 match bits */ ptl_match_bits_t key; + /** Portals4 me_h */ + ptl_handle_me_t me_h; }; /* diff --git a/opal/mca/btl/portals4/btl_portals4_rdma.c b/opal/mca/btl/portals4/btl_portals4_rdma.c index dde1f81442..76898a41e1 100644 --- a/opal/mca/btl/portals4/btl_portals4_rdma.c +++ b/opal/mca/btl/portals4/btl_portals4_rdma.c @@ -67,10 +67,8 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, return OPAL_ERROR; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "mca_btl_portals4_prepare_src: Incrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops)); - - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "mca_btl_portals4_get frag=%p\n", (void *)frag)); + "mca_btl_portals4_get: Incrementing portals_outstanding_ops=%d frag=%p", + portals4_btl->portals_outstanding_ops, (void *)frag)); frag->rdma_cb.func = cbfunc; frag->rdma_cb.context = cbcontext; @@ -101,6 +99,10 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, frag->match_bits = remote_handle->key; frag->length = md.length; frag->peer_proc = btl_peer->ptl_proc; + + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", + md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); + ret = PtlGet(frag->md_h, 0, md.length, @@ -117,7 +119,7 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, frag->md_h = PTL_INVALID_HANDLE; return OPAL_ERROR; } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); return OPAL_SUCCESS; From b1cef6c3ea912663962edfc680dfc768702da532 Mon Sep 17 00:00:00 2001 From: Todd Kordenbrock Date: Tue, 14 Apr 2015 08:24:40 -0500 Subject: [PATCH 2/2] btl-portals4: remove unused code path The Portals4 BTL is registered with the PML as an RDMA BTL, so prepare_src() is only used in limited cases. This commit removes the code path from prepare_src() for unbuffered contiguous buffers with no PML reserve. This is now handled in register_mem(). --- opal/mca/btl/portals4/btl_portals4.c | 41 ---------------------------- 1 file changed, 41 deletions(-) diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index 7b04fecb40..0811d87244 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -324,47 +324,6 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base, frag->segments[0].base.seg_len = max_data + reserve; frag->base.des_segment_count = 1; - - } else { - /* no need to pack - rdma operation out of user's buffer */ - - /* reserve space in the event queue for rdma operations immediately */ - while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > - portals4_btl->portals_max_outstanding_ops) { - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n")); - mca_btl_portals4_component_progress(); - } - - OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag); - if (NULL == frag){ - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); - return NULL; - } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "mca_btl_portals4_prepare_src: Incrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops)); - - iov.iov_len = max_data; - iov.iov_base = NULL; - - ret = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); - if ( OPAL_UNLIKELY(ret < 0) ) { - OPAL_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag); - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); - return NULL; - } - - frag->segments[0].base.seg_len = max_data; - frag->segments[0].base.seg_addr.pval = iov.iov_base; - frag->segments[0].key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1); - frag->base.des_segment_count = 1; - - /* either a put or get. figure out which later */ - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "rdma src posted for frag 0x%lx, callback 0x%lx, bits %"PRIu64", flags say %d" , - (unsigned long) frag, - (unsigned long) frag->base.des_cbfunc, - frag->segments[0].key, flags)); } frag->base.des_segments = &frag->segments[0].base;