diff --git a/opal/mca/btl/ugni/btl_ugni_frag.h b/opal/mca/btl/ugni/btl_ugni_frag.h index c912b9abc5..2b04564c61 100644 --- a/opal/mca/btl/ugni/btl_ugni_frag.h +++ b/opal/mca/btl/ugni/btl_ugni_frag.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2013 The University of Tennessee and The University @@ -66,6 +66,7 @@ struct mca_btl_ugni_base_frag_t; typedef struct mca_btl_ugni_base_frag_t { mca_btl_base_descriptor_t base; + volatile int32_t ref_cnt; uint32_t msg_id; uint16_t hdr_size; uint16_t flags; @@ -148,6 +149,7 @@ static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep, if (OPAL_LIKELY(NULL != *frag)) { (*frag)->my_list = list; (*frag)->endpoint = ep; + (*frag)->ref_cnt = 1; return OPAL_SUCCESS; } @@ -169,10 +171,16 @@ static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag) return OPAL_SUCCESS; } -static inline void mca_btl_ugni_frag_complete (mca_btl_ugni_base_frag_t *frag, int rc) { - frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE; +static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, int rc) { + int32_t ref_cnt; - BTL_VERBOSE(("frag complete. flags = %d", frag->base.des_flags)); + opal_atomic_mb (); + + ref_cnt = OPAL_THREAD_ADD32(&frag->ref_cnt, -1); + if (ref_cnt) { + assert (ref_cnt > 0); + return false; + } /* call callback if specified */ if (frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { @@ -182,6 +190,20 @@ static inline void mca_btl_ugni_frag_complete (mca_btl_ugni_base_frag_t *frag, i if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) { mca_btl_ugni_frag_return (frag); } + + return true; +} + +static inline void mca_btl_ugni_frag_complete (mca_btl_ugni_base_frag_t *frag, int rc) { + BTL_VERBOSE(("frag complete. flags = %d", frag->base.des_flags)); + + frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE; + + mca_btl_ugni_frag_del_ref (frag, rc); +} + +static inline bool mca_btl_ugni_frag_check_complete (mca_btl_ugni_base_frag_t *frag) { + return !!(MCA_BTL_UGNI_FRAG_COMPLETE & frag->flags); } #define MCA_BTL_UGNI_FRAG_ALLOC_SMSG(ep, frag) \ diff --git a/opal/mca/btl/ugni/btl_ugni_send.c b/opal/mca/btl/ugni/btl_ugni_send.c index cc98272902..45e17ec13f 100644 --- a/opal/mca/btl/ugni/btl_ugni_send.c +++ b/opal/mca/btl/ugni/btl_ugni_send.c @@ -25,7 +25,6 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl, mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) descriptor; size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len; mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; - int flags_save = frag->base.des_flags; int rc; /* tag and len are at the same location in eager and smsg frag hdrs */ @@ -43,34 +42,38 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl, BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor, OPAL_PROC_MY_NAME.vpid, endpoint->common->ep_rem_id, size)); - /* temporarily disable ownership and callback flags so we can reliably check the complete flag */ - frag->base.des_flags &= ~(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); + /* add a reference to prevent the fragment from being returned until after the + * completion flag is checked. */ + ++frag->ref_cnt; frag->flags &= ~MCA_BTL_UGNI_FRAG_COMPLETE; rc = mca_btl_ugni_send_frag (endpoint, frag); - - if (OPAL_LIKELY(frag->flags & MCA_BTL_UGNI_FRAG_COMPLETE)) { + if (OPAL_LIKELY(mca_btl_ugni_frag_check_complete (frag))) { /* fast path: remote side has received the frag */ - frag->base.des_flags = flags_save; - mca_btl_ugni_frag_complete (frag, OPAL_SUCCESS); + (void) mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS); return 1; } - if ((OPAL_SUCCESS == rc) && (frag->flags & MCA_BTL_UGNI_FRAG_BUFFERED) && (flags_save & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) { + if ((OPAL_SUCCESS == rc) && (frag->flags & MCA_BTL_UGNI_FRAG_BUFFERED) && (frag->flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) { /* fast(ish) path: btl owned buffered frag. report send as complete */ - frag->base.des_flags = flags_save & ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + bool call_callback = !!(frag->flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK); + frag->flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - if (OPAL_LIKELY(flags_save & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) { + if (call_callback) { frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc); } + (void) mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS); + return 1; } /* slow(ish) path: remote side hasn't received the frag. call the frag's callback when we get the local smsg/msgq or remote rdma completion */ - frag->base.des_flags = flags_save | MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + + mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS); if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) { /* queue up request */