btl/vader: when using single-copy emulation fragment large rdma
This commit changes how the single-copy emulation in the vader btl operates. Before this change the BTL set its put and get limits based on the max send size. After this change the limits are unset and the put or get operation is fragmented internally. References #6568 Signed-off-by: Nathan Hjelm <hjelmn@google.com>
Этот коммит содержится в:
родитель
5ff6cb6e6a
Коммит
ae91b11de2
@ -2,6 +2,7 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -16,58 +17,14 @@
|
||||
#include "btl_vader_endpoint.h"
|
||||
#include "btl_vader_xpmem.h"
|
||||
|
||||
static void mca_btl_vader_sc_emu_aop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_base_descriptor_t *desc, int status)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
|
||||
void *local_address = frag->rdma.local_address;
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
|
||||
/* return the fragment first since the callback may call put/get/amo and could use this fragment */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
|
||||
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
|
||||
}
|
||||
|
||||
int mca_btl_vader_emu_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
|
||||
mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags, NULL,
|
||||
remote_address, cbfunc, cbcontext, cbdata, mca_btl_vader_sc_emu_aop_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static void mca_btl_vader_sc_emu_afop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_base_descriptor_t *desc, int status)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
void *local_address = frag->rdma.local_address;
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
|
||||
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
|
||||
*((int64_t *) frag->rdma.local_address) = hdr->operand[0];
|
||||
|
||||
/* return the fragment first since the callback may call put/get/amo and could use this fragment */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
|
||||
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
|
||||
size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags,
|
||||
size, NULL, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
||||
int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
@ -76,19 +33,9 @@ int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_ba
|
||||
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata,
|
||||
mca_btl_vader_sc_emu_afop_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags,
|
||||
size, local_address, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
||||
int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
@ -96,17 +43,7 @@ int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_
|
||||
mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, 0, order,
|
||||
flags, local_address, remote_address, cbfunc, cbcontext, cbdata,
|
||||
mca_btl_vader_sc_emu_afop_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, order,
|
||||
flags, size, local_address, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
@ -21,6 +21,7 @@
|
||||
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
|
||||
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -478,12 +479,6 @@ static void mca_btl_vader_check_single_copy (void)
|
||||
mca_btl_vader.super.btl_get = NULL;
|
||||
mca_btl_vader.super.btl_put = NULL;
|
||||
}
|
||||
|
||||
if (MCA_BTL_VADER_EMUL == mca_btl_vader_component.single_copy_mechanism) {
|
||||
/* limit to the maximum fragment size */
|
||||
mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
|
||||
mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -91,9 +92,12 @@ struct mca_btl_vader_frag_t {
|
||||
/** rdma callback data */
|
||||
struct mca_btl_vader_rdma_cbdata_t {
|
||||
void *local_address;
|
||||
uint64_t remote_address;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc;
|
||||
void *context;
|
||||
void *cbdata;
|
||||
size_t remaining;
|
||||
size_t sent;
|
||||
} rdma;
|
||||
};
|
||||
|
||||
@ -151,28 +155,87 @@ static inline void mca_btl_vader_frag_complete (mca_btl_vader_frag_t *frag) {
|
||||
|
||||
int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx);
|
||||
|
||||
static inline mca_btl_vader_frag_t *
|
||||
mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type,
|
||||
uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, int order,
|
||||
int flags, size_t size, void *local_address, int64_t remote_address,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext,
|
||||
void *cbdata, mca_btl_base_completion_fn_t des_cbfunc)
|
||||
static inline void mca_btl_vader_rdma_frag_advance (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_vader_frag_t *frag, int status)
|
||||
{
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
size_t total_size = size + sizeof (*hdr);
|
||||
mca_btl_vader_frag_t *frag;
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
size_t hdr_size = sizeof (*hdr);
|
||||
size_t len = frag->rdma.sent ? frag->segments[0].seg_len - hdr_size : 0;
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
void *data = (void *) (hdr + 1);
|
||||
|
||||
frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, total_size,
|
||||
MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
if (frag->rdma.sent) {
|
||||
if (MCA_BTL_VADER_OP_GET == hdr->type) {
|
||||
memcpy (frag->rdma.local_address, data, len);
|
||||
} else if ((MCA_BTL_VADER_OP_ATOMIC == hdr->type || MCA_BTL_VADER_OP_CSWAP == hdr->type) &&
|
||||
frag->rdma.local_address) {
|
||||
if (8 == len) {
|
||||
*((int64_t *) frag->rdma.local_address) = hdr->operand[0];
|
||||
} else {
|
||||
*((int32_t *) frag->rdma.local_address) = (int32_t) hdr->operand[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
frag->base.des_cbfunc = des_cbfunc;
|
||||
if (frag->rdma.remaining) {
|
||||
size_t packet_size = (frag->rdma.remaining + hdr_size) <= mca_btl_vader.super.btl_max_send_size ?
|
||||
frag->rdma.remaining : mca_btl_vader.super.btl_max_send_size - hdr_size;
|
||||
|
||||
/* advance the local and remote pointers */
|
||||
frag->rdma.local_address = (void *)((uintptr_t) frag->rdma.local_address + len);
|
||||
frag->rdma.remote_address += len;
|
||||
|
||||
if (MCA_BTL_VADER_OP_PUT == hdr->type) {
|
||||
/* copy the next block into the fragment buffer */
|
||||
memcpy ((void *) (hdr + 1), frag->rdma.local_address, packet_size);
|
||||
}
|
||||
|
||||
hdr->addr = frag->rdma.remote_address;
|
||||
/* clear out the complete flag before sending the fragment again */
|
||||
frag->hdr->flags &= ~MCA_BTL_VADER_FLAG_COMPLETE;
|
||||
frag->segments[0].seg_len = packet_size + sizeof (*hdr);
|
||||
frag->rdma.sent += packet_size;
|
||||
frag->rdma.remaining -= packet_size;
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
return;
|
||||
}
|
||||
|
||||
/* return the fragment before calling the callback */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
cbfunc (btl, endpoint, (void *)((uintptr_t) frag->rdma.local_address - frag->rdma.sent), NULL,
|
||||
context, cbdata, status);
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_vader_rdma_frag_start (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type,
|
||||
uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, int order,
|
||||
int flags, size_t size, void *local_address, int64_t remote_address,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
size_t hdr_size = sizeof (*hdr);
|
||||
size_t packet_size = (size + hdr_size) <= mca_btl_vader.super.btl_max_send_size ? size :
|
||||
mca_btl_vader.super.btl_max_send_size - hdr_size;
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, packet_size + hdr_size,
|
||||
MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
frag->base.des_cbfunc = (mca_btl_base_completion_fn_t) mca_btl_vader_rdma_frag_advance;
|
||||
frag->rdma.local_address = local_address;
|
||||
frag->rdma.remote_address = remote_address;
|
||||
frag->rdma.cbfunc = cbfunc;
|
||||
frag->rdma.context = cbcontext;
|
||||
frag->rdma.cbdata = cbdata;
|
||||
frag->rdma.remaining = size;
|
||||
frag->rdma.sent = 0;
|
||||
|
||||
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
|
||||
@ -183,7 +246,8 @@ mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint
|
||||
hdr->operand[0] = operand1;
|
||||
hdr->operand[1] = operand2;
|
||||
|
||||
return frag;
|
||||
mca_btl_vader_rdma_frag_advance (btl, endpoint, frag, OPAL_SUCCESS);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
#endif /* MCA_BTL_VADER_SEND_FRAG_H */
|
||||
|
@ -4,6 +4,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -156,49 +157,15 @@ int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
|
||||
}
|
||||
#endif
|
||||
|
||||
static void mca_btl_vader_sc_emu_get_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_base_descriptor_t *desc, int status)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
void *local_address = frag->rdma.local_address;
|
||||
size_t len = frag->segments[0].seg_len - sizeof (*hdr);
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
void *data;
|
||||
|
||||
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
data = (void *) (hdr + 1);
|
||||
|
||||
memcpy (local_address, data, len);
|
||||
|
||||
/* return the fragment before calling the callback */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
|
||||
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
|
||||
}
|
||||
|
||||
int mca_btl_vader_get_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
if (size > mca_btl_vader.super.btl_get_limit) {
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata,
|
||||
mca_btl_vader_sc_emu_get_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -135,21 +136,6 @@ int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
|
||||
}
|
||||
#endif
|
||||
|
||||
static void mca_btl_vader_sc_emu_put_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_base_descriptor_t *desc, int status)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
|
||||
void *local_address = frag->rdma.local_address;
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
|
||||
/* return the fragment first since the callback may call put/get/amo and could use this fragment */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
|
||||
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provides an emulated put path which uses copy-in copy-out with shared memory buffers
|
||||
*/
|
||||
@ -158,26 +144,10 @@ int mca_btl_vader_put_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_
|
||||
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
if (size > mca_btl_vader.super.btl_put_limit) {
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata,
|
||||
mca_btl_vader_sc_emu_put_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
|
||||
memcpy ((void *) (hdr + 1), local_address, size);
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user