Merge pull request #6961 from hjelmn/fix_btl_vader_fragment_issue
btl/vader: when using single-copy emulation fragment large rdma
Этот коммит содержится в:
Коммит
d7f6dd0f30
@ -2,6 +2,7 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -16,58 +17,14 @@
|
||||
#include "btl_vader_endpoint.h"
|
||||
#include "btl_vader_xpmem.h"
|
||||
|
||||
static void mca_btl_vader_sc_emu_aop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_base_descriptor_t *desc, int status)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
|
||||
void *local_address = frag->rdma.local_address;
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
|
||||
/* return the fragment first since the callback may call put/get/amo and could use this fragment */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
|
||||
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
|
||||
}
|
||||
|
||||
int mca_btl_vader_emu_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
|
||||
mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags, NULL,
|
||||
remote_address, cbfunc, cbcontext, cbdata, mca_btl_vader_sc_emu_aop_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static void mca_btl_vader_sc_emu_afop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_base_descriptor_t *desc, int status)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
void *local_address = frag->rdma.local_address;
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
|
||||
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
|
||||
*((int64_t *) frag->rdma.local_address) = hdr->operand[0];
|
||||
|
||||
/* return the fragment first since the callback may call put/get/amo and could use this fragment */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
|
||||
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
|
||||
size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags,
|
||||
size, NULL, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
||||
int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
@ -76,19 +33,9 @@ int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_ba
|
||||
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata,
|
||||
mca_btl_vader_sc_emu_afop_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags,
|
||||
size, local_address, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
||||
int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
@ -96,17 +43,7 @@ int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_
|
||||
mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, 0, order,
|
||||
flags, local_address, remote_address, cbfunc, cbcontext, cbdata,
|
||||
mca_btl_vader_sc_emu_afop_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, order,
|
||||
flags, size, local_address, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
@ -21,6 +21,7 @@
|
||||
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
|
||||
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -478,12 +479,6 @@ static void mca_btl_vader_check_single_copy (void)
|
||||
mca_btl_vader.super.btl_get = NULL;
|
||||
mca_btl_vader.super.btl_put = NULL;
|
||||
}
|
||||
|
||||
if (MCA_BTL_VADER_EMUL == mca_btl_vader_component.single_copy_mechanism) {
|
||||
/* limit to the maximum fragment size */
|
||||
mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
|
||||
mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -91,9 +92,12 @@ struct mca_btl_vader_frag_t {
|
||||
/** rdma callback data */
|
||||
struct mca_btl_vader_rdma_cbdata_t {
|
||||
void *local_address;
|
||||
uint64_t remote_address;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc;
|
||||
void *context;
|
||||
void *cbdata;
|
||||
size_t remaining;
|
||||
size_t sent;
|
||||
} rdma;
|
||||
};
|
||||
|
||||
@ -151,28 +155,87 @@ static inline void mca_btl_vader_frag_complete (mca_btl_vader_frag_t *frag) {
|
||||
|
||||
int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx);
|
||||
|
||||
static inline mca_btl_vader_frag_t *
|
||||
mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type,
|
||||
uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, int order,
|
||||
int flags, size_t size, void *local_address, int64_t remote_address,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext,
|
||||
void *cbdata, mca_btl_base_completion_fn_t des_cbfunc)
|
||||
static inline void mca_btl_vader_rdma_frag_advance (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_vader_frag_t *frag, int status)
|
||||
{
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
size_t total_size = size + sizeof (*hdr);
|
||||
mca_btl_vader_frag_t *frag;
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
size_t hdr_size = sizeof (*hdr);
|
||||
size_t len = frag->rdma.sent ? frag->segments[0].seg_len - hdr_size : 0;
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
void *data = (void *) (hdr + 1);
|
||||
|
||||
frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, total_size,
|
||||
MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
if (frag->rdma.sent) {
|
||||
if (MCA_BTL_VADER_OP_GET == hdr->type) {
|
||||
memcpy (frag->rdma.local_address, data, len);
|
||||
} else if ((MCA_BTL_VADER_OP_ATOMIC == hdr->type || MCA_BTL_VADER_OP_CSWAP == hdr->type) &&
|
||||
frag->rdma.local_address) {
|
||||
if (8 == len) {
|
||||
*((int64_t *) frag->rdma.local_address) = hdr->operand[0];
|
||||
} else {
|
||||
*((int32_t *) frag->rdma.local_address) = (int32_t) hdr->operand[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
frag->base.des_cbfunc = des_cbfunc;
|
||||
if (frag->rdma.remaining) {
|
||||
size_t packet_size = (frag->rdma.remaining + hdr_size) <= mca_btl_vader.super.btl_max_send_size ?
|
||||
frag->rdma.remaining : mca_btl_vader.super.btl_max_send_size - hdr_size;
|
||||
|
||||
/* advance the local and remote pointers */
|
||||
frag->rdma.local_address = (void *)((uintptr_t) frag->rdma.local_address + len);
|
||||
frag->rdma.remote_address += len;
|
||||
|
||||
if (MCA_BTL_VADER_OP_PUT == hdr->type) {
|
||||
/* copy the next block into the fragment buffer */
|
||||
memcpy ((void *) (hdr + 1), frag->rdma.local_address, packet_size);
|
||||
}
|
||||
|
||||
hdr->addr = frag->rdma.remote_address;
|
||||
/* clear out the complete flag before sending the fragment again */
|
||||
frag->hdr->flags &= ~MCA_BTL_VADER_FLAG_COMPLETE;
|
||||
frag->segments[0].seg_len = packet_size + sizeof (*hdr);
|
||||
frag->rdma.sent += packet_size;
|
||||
frag->rdma.remaining -= packet_size;
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
return;
|
||||
}
|
||||
|
||||
/* return the fragment before calling the callback */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
cbfunc (btl, endpoint, (void *)((uintptr_t) frag->rdma.local_address - frag->rdma.sent), NULL,
|
||||
context, cbdata, status);
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_vader_rdma_frag_start (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type,
|
||||
uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, int order,
|
||||
int flags, size_t size, void *local_address, int64_t remote_address,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
size_t hdr_size = sizeof (*hdr);
|
||||
size_t packet_size = (size + hdr_size) <= mca_btl_vader.super.btl_max_send_size ? size :
|
||||
mca_btl_vader.super.btl_max_send_size - hdr_size;
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, packet_size + hdr_size,
|
||||
MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
frag->base.des_cbfunc = (mca_btl_base_completion_fn_t) mca_btl_vader_rdma_frag_advance;
|
||||
frag->rdma.local_address = local_address;
|
||||
frag->rdma.remote_address = remote_address;
|
||||
frag->rdma.cbfunc = cbfunc;
|
||||
frag->rdma.context = cbcontext;
|
||||
frag->rdma.cbdata = cbdata;
|
||||
frag->rdma.remaining = size;
|
||||
frag->rdma.sent = 0;
|
||||
|
||||
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
|
||||
@ -183,7 +246,8 @@ mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint
|
||||
hdr->operand[0] = operand1;
|
||||
hdr->operand[1] = operand2;
|
||||
|
||||
return frag;
|
||||
mca_btl_vader_rdma_frag_advance (btl, endpoint, frag, OPAL_SUCCESS);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
#endif /* MCA_BTL_VADER_SEND_FRAG_H */
|
||||
|
@ -4,6 +4,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -156,49 +157,15 @@ int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
|
||||
}
|
||||
#endif
|
||||
|
||||
static void mca_btl_vader_sc_emu_get_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_base_descriptor_t *desc, int status)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
void *local_address = frag->rdma.local_address;
|
||||
size_t len = frag->segments[0].seg_len - sizeof (*hdr);
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
void *data;
|
||||
|
||||
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
data = (void *) (hdr + 1);
|
||||
|
||||
memcpy (local_address, data, len);
|
||||
|
||||
/* return the fragment before calling the callback */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
|
||||
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
|
||||
}
|
||||
|
||||
int mca_btl_vader_get_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
if (size > mca_btl_vader.super.btl_get_limit) {
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata,
|
||||
mca_btl_vader_sc_emu_get_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -135,21 +136,6 @@ int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
|
||||
}
|
||||
#endif
|
||||
|
||||
static void mca_btl_vader_sc_emu_put_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_base_descriptor_t *desc, int status)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
|
||||
void *local_address = frag->rdma.local_address;
|
||||
void *context = frag->rdma.context;
|
||||
void *cbdata = frag->rdma.cbdata;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
|
||||
|
||||
/* return the fragment first since the callback may call put/get/amo and could use this fragment */
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
|
||||
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provides an emulated put path which uses copy-in copy-out with shared memory buffers
|
||||
*/
|
||||
@ -158,26 +144,10 @@ int mca_btl_vader_put_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_
|
||||
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_sc_emu_hdr_t *hdr;
|
||||
mca_btl_vader_frag_t *frag;
|
||||
|
||||
if (size > mca_btl_vader.super.btl_put_limit) {
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata,
|
||||
mca_btl_vader_sc_emu_put_complete);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
|
||||
|
||||
memcpy ((void *) (hdr + 1), local_address, size);
|
||||
|
||||
/* send is always successful */
|
||||
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size,
|
||||
local_address, remote_address, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user