diff --git a/opal/mca/btl/vader/btl_vader_atomic.c b/opal/mca/btl/vader/btl_vader_atomic.c index df3c2664be..1bf54430df 100644 --- a/opal/mca/btl/vader/btl_vader_atomic.c +++ b/opal/mca/btl/vader/btl_vader_atomic.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,58 +17,14 @@ #include "btl_vader_endpoint.h" #include "btl_vader_xpmem.h" -static void mca_btl_vader_sc_emu_aop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *desc, int status) -{ - mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc; - void *local_address = frag->rdma.local_address; - void *context = frag->rdma.context; - void *cbdata = frag->rdma.cbdata; - mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; - - /* return the fragment first since the callback may call put/get/amo and could use this fragment */ - MCA_BTL_VADER_FRAG_RETURN(frag); - - cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status); -} - int mca_btl_vader_emu_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_frag_t *frag; - - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags, NULL, - remote_address, cbfunc, cbcontext, cbdata, mca_btl_vader_sc_emu_aop_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; -} - -static void mca_btl_vader_sc_emu_afop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *desc, int status) -{ - mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc; - mca_btl_vader_sc_emu_hdr_t *hdr; - void *local_address = frag->rdma.local_address; - void *context = frag->rdma.context; - void *cbdata = frag->rdma.cbdata; - mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; - - hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; - - *((int64_t *) frag->rdma.local_address) = hdr->operand[0]; - - /* return the fragment first since the callback may call put/get/amo and could use this fragment */ - MCA_BTL_VADER_FRAG_RETURN(frag); - - cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status); + size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags, + size, NULL, remote_address, cbfunc, cbcontext, cbdata); } int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -76,19 +33,9 @@ int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_ba uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_frag_t *frag; - - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags, - local_address, remote_address, cbfunc, cbcontext, cbdata, - mca_btl_vader_sc_emu_afop_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; + size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags, + size, local_address, remote_address, cbfunc, cbcontext, cbdata); } int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -96,17 +43,7 @@ int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_ mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_frag_t *frag; - - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, 0, order, - flags, local_address, remote_address, cbfunc, cbcontext, cbdata, - mca_btl_vader_sc_emu_afop_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; + size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, order, + flags, size, local_address, remote_address, cbfunc, cbcontext, cbdata); } diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 5a163f637a..98ac462b71 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -21,6 +21,7 @@ * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -477,12 +478,6 @@ static void mca_btl_vader_check_single_copy (void) mca_btl_vader.super.btl_get = NULL; mca_btl_vader.super.btl_put = NULL; } - - if (MCA_BTL_VADER_EMUL == mca_btl_vader_component.single_copy_mechanism) { - /* limit to the maximum fragment size */ - mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t); - mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t); - } } /* diff --git a/opal/mca/btl/vader/btl_vader_frag.h b/opal/mca/btl/vader/btl_vader_frag.h index d3ce21dae3..2de2691481 100644 --- a/opal/mca/btl/vader/btl_vader_frag.h +++ b/opal/mca/btl/vader/btl_vader_frag.h @@ -14,6 +14,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,9 +92,12 @@ struct mca_btl_vader_frag_t { /** rdma callback data */ struct mca_btl_vader_rdma_cbdata_t { void *local_address; + uint64_t remote_address; mca_btl_base_rdma_completion_fn_t cbfunc; void *context; void *cbdata; + size_t remaining; + size_t sent; } rdma; }; @@ -151,28 +155,87 @@ static inline void mca_btl_vader_frag_complete (mca_btl_vader_frag_t *frag) { int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx); -static inline mca_btl_vader_frag_t * -mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type, - uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, int order, - int flags, size_t size, void *local_address, int64_t remote_address, - mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata, mca_btl_base_completion_fn_t des_cbfunc) +static inline void mca_btl_vader_rdma_frag_advance (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, + mca_btl_vader_frag_t *frag, int status) { - mca_btl_vader_sc_emu_hdr_t *hdr; - size_t total_size = size + sizeof (*hdr); - mca_btl_vader_frag_t *frag; + mca_btl_vader_sc_emu_hdr_t *hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; + mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; + size_t hdr_size = sizeof (*hdr); + size_t len = frag->rdma.sent ? frag->segments[0].seg_len - hdr_size : 0; + void *context = frag->rdma.context; + void *cbdata = frag->rdma.cbdata; + void *data = (void *) (hdr + 1); - frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, total_size, - MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if (OPAL_UNLIKELY(NULL == frag)) { - return NULL; + if (frag->rdma.sent) { + if (MCA_BTL_VADER_OP_GET == hdr->type) { + memcpy (frag->rdma.local_address, data, len); + } else if ((MCA_BTL_VADER_OP_ATOMIC == hdr->type || MCA_BTL_VADER_OP_CSWAP == hdr->type) && + frag->rdma.local_address) { + if (8 == len) { + *((int64_t *) frag->rdma.local_address) = hdr->operand[0]; + } else { + *((int32_t *) frag->rdma.local_address) = (int32_t) hdr->operand[0]; + } + } } - frag->base.des_cbfunc = des_cbfunc; + if (frag->rdma.remaining) { + size_t packet_size = (frag->rdma.remaining + hdr_size) <= mca_btl_vader.super.btl_max_send_size ? + frag->rdma.remaining : mca_btl_vader.super.btl_max_send_size - hdr_size; + + /* advance the local and remote pointers */ + frag->rdma.local_address = (void *)((uintptr_t) frag->rdma.local_address + len); + frag->rdma.remote_address += len; + + if (MCA_BTL_VADER_OP_PUT == hdr->type) { + /* copy the next block into the fragment buffer */ + memcpy ((void *) (hdr + 1), frag->rdma.local_address, packet_size); + } + + hdr->addr = frag->rdma.remote_address; + /* clear out the complete flag before sending the fragment again */ + frag->hdr->flags &= ~MCA_BTL_VADER_FLAG_COMPLETE; + frag->segments[0].seg_len = packet_size + sizeof (*hdr); + frag->rdma.sent += packet_size; + frag->rdma.remaining -= packet_size; + + /* send is always successful */ + (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); + return; + } + + /* return the fragment before calling the callback */ + MCA_BTL_VADER_FRAG_RETURN(frag); + cbfunc (btl, endpoint, (void *)((uintptr_t) frag->rdma.local_address - frag->rdma.sent), NULL, + context, cbdata, status); +} + +static inline int +mca_btl_vader_rdma_frag_start (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type, + uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, int order, + int flags, size_t size, void *local_address, int64_t remote_address, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) +{ + mca_btl_vader_sc_emu_hdr_t *hdr; + size_t hdr_size = sizeof (*hdr); + size_t packet_size = (size + hdr_size) <= mca_btl_vader.super.btl_max_send_size ? size : + mca_btl_vader.super.btl_max_send_size - hdr_size; + mca_btl_vader_frag_t *frag; + + frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, packet_size + hdr_size, + MCA_BTL_DES_SEND_ALWAYS_CALLBACK); + if (OPAL_UNLIKELY(NULL == frag)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + frag->base.des_cbfunc = (mca_btl_base_completion_fn_t) mca_btl_vader_rdma_frag_advance; frag->rdma.local_address = local_address; + frag->rdma.remote_address = remote_address; frag->rdma.cbfunc = cbfunc; frag->rdma.context = cbcontext; frag->rdma.cbdata = cbdata; + frag->rdma.remaining = size; + frag->rdma.sent = 0; hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; @@ -183,7 +246,8 @@ mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint hdr->operand[0] = operand1; hdr->operand[1] = operand2; - return frag; + mca_btl_vader_rdma_frag_advance (btl, endpoint, frag, OPAL_SUCCESS); + return OPAL_SUCCESS; } #endif /* MCA_BTL_VADER_SEND_FRAG_H */ diff --git a/opal/mca/btl/vader/btl_vader_get.c b/opal/mca/btl/vader/btl_vader_get.c index db4b678ef1..a71203f1ad 100644 --- a/opal/mca/btl/vader/btl_vader_get.c +++ b/opal/mca/btl/vader/btl_vader_get.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -156,49 +157,15 @@ int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t } #endif -static void mca_btl_vader_sc_emu_get_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *desc, int status) -{ - mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc; - mca_btl_vader_sc_emu_hdr_t *hdr; - void *local_address = frag->rdma.local_address; - size_t len = frag->segments[0].seg_len - sizeof (*hdr); - void *context = frag->rdma.context; - void *cbdata = frag->rdma.cbdata; - mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; - void *data; - - hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; - data = (void *) (hdr + 1); - - memcpy (local_address, data, len); - - /* return the fragment before calling the callback */ - MCA_BTL_VADER_FRAG_RETURN(frag); - - cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status); -} - int mca_btl_vader_get_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_frag_t *frag; - if (size > mca_btl_vader.super.btl_get_limit) { return OPAL_ERR_NOT_AVAILABLE; } - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size, - local_address, remote_address, cbfunc, cbcontext, cbdata, - mca_btl_vader_sc_emu_get_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size, + local_address, remote_address, cbfunc, cbcontext, cbdata); } diff --git a/opal/mca/btl/vader/btl_vader_put.c b/opal/mca/btl/vader/btl_vader_put.c index 0224d2fe19..8e47a70332 100644 --- a/opal/mca/btl/vader/btl_vader_put.c +++ b/opal/mca/btl/vader/btl_vader_put.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -135,21 +136,6 @@ int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t } #endif -static void mca_btl_vader_sc_emu_put_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *desc, int status) -{ - mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc; - void *local_address = frag->rdma.local_address; - void *context = frag->rdma.context; - void *cbdata = frag->rdma.cbdata; - mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; - - /* return the fragment first since the callback may call put/get/amo and could use this fragment */ - MCA_BTL_VADER_FRAG_RETURN(frag); - - cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status); -} - /** * @brief Provides an emulated put path which uses copy-in copy-out with shared memory buffers */ @@ -158,26 +144,10 @@ int mca_btl_vader_put_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_ mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_sc_emu_hdr_t *hdr; - mca_btl_vader_frag_t *frag; - if (size > mca_btl_vader.super.btl_put_limit) { return OPAL_ERR_NOT_AVAILABLE; } - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size, - local_address, remote_address, cbfunc, cbcontext, cbdata, - mca_btl_vader_sc_emu_put_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; - - memcpy ((void *) (hdr + 1), local_address, size); - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size, + local_address, remote_address, cbfunc, cbcontext, cbdata); }