1
1

Merge pull request #6961 from hjelmn/fix_btl_vader_fragment_issue

btl/vader: when using single-copy emulation fragment large rdma
Этот коммит содержится в:
bosilca 2019-09-06 22:24:12 -04:00 коммит произвёл GitHub
родитель 884d4e78cc ae91b11de2
Коммит d7f6dd0f30
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 96 добавлений и 163 удалений

Просмотреть файл

@ -2,6 +2,7 @@
/*
* Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2019 Google, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -16,58 +17,14 @@
#include "btl_vader_endpoint.h"
#include "btl_vader_xpmem.h"
static void mca_btl_vader_sc_emu_aop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
mca_btl_base_descriptor_t *desc, int status)
{
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
void *local_address = frag->rdma.local_address;
void *context = frag->rdma.context;
void *cbdata = frag->rdma.cbdata;
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
/* return the fragment first since the callback may call put/get/amo and could use this fragment */
MCA_BTL_VADER_FRAG_RETURN(frag);
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
}
int mca_btl_vader_emu_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_btl_vader_frag_t *frag;
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags, NULL,
remote_address, cbfunc, cbcontext, cbdata, mca_btl_vader_sc_emu_aop_complete);
if (OPAL_UNLIKELY(NULL == frag)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* send is always successful */
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
return OPAL_SUCCESS;
}
static void mca_btl_vader_sc_emu_afop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
mca_btl_base_descriptor_t *desc, int status)
{
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
mca_btl_vader_sc_emu_hdr_t *hdr;
void *local_address = frag->rdma.local_address;
void *context = frag->rdma.context;
void *cbdata = frag->rdma.cbdata;
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
*((int64_t *) frag->rdma.local_address) = hdr->operand[0];
/* return the fragment first since the callback may call put/get/amo and could use this fragment */
MCA_BTL_VADER_FRAG_RETURN(frag);
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8;
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags,
size, NULL, remote_address, cbfunc, cbcontext, cbdata);
}
int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
@ -76,19 +33,9 @@ int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_ba
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
mca_btl_vader_frag_t *frag;
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags,
local_address, remote_address, cbfunc, cbcontext, cbdata,
mca_btl_vader_sc_emu_afop_complete);
if (OPAL_UNLIKELY(NULL == frag)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* send is always successful */
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
return OPAL_SUCCESS;
size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8;
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags,
size, local_address, remote_address, cbfunc, cbcontext, cbdata);
}
int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
@ -96,17 +43,7 @@ int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_
mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_btl_vader_frag_t *frag;
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, 0, order,
flags, local_address, remote_address, cbfunc, cbcontext, cbdata,
mca_btl_vader_sc_emu_afop_complete);
if (OPAL_UNLIKELY(NULL == frag)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* send is always successful */
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
return OPAL_SUCCESS;
size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8;
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, order,
flags, size, local_address, remote_address, cbfunc, cbcontext, cbdata);
}

Просмотреть файл

@ -21,6 +21,7 @@
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2019 Google, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -478,12 +479,6 @@ static void mca_btl_vader_check_single_copy (void)
mca_btl_vader.super.btl_get = NULL;
mca_btl_vader.super.btl_put = NULL;
}
if (MCA_BTL_VADER_EMUL == mca_btl_vader_component.single_copy_mechanism) {
/* limit to the maximum fragment size */
mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
}
}
/*

Просмотреть файл

@ -14,6 +14,7 @@
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2019 Google, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -91,9 +92,12 @@ struct mca_btl_vader_frag_t {
/** rdma callback data */
struct mca_btl_vader_rdma_cbdata_t {
void *local_address;
uint64_t remote_address;
mca_btl_base_rdma_completion_fn_t cbfunc;
void *context;
void *cbdata;
size_t remaining;
size_t sent;
} rdma;
};
@ -151,28 +155,87 @@ static inline void mca_btl_vader_frag_complete (mca_btl_vader_frag_t *frag) {
int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx);
static inline mca_btl_vader_frag_t *
mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type,
uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, int order,
int flags, size_t size, void *local_address, int64_t remote_address,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext,
void *cbdata, mca_btl_base_completion_fn_t des_cbfunc)
static inline void mca_btl_vader_rdma_frag_advance (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
mca_btl_vader_frag_t *frag, int status)
{
mca_btl_vader_sc_emu_hdr_t *hdr;
size_t total_size = size + sizeof (*hdr);
mca_btl_vader_frag_t *frag;
mca_btl_vader_sc_emu_hdr_t *hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
size_t hdr_size = sizeof (*hdr);
size_t len = frag->rdma.sent ? frag->segments[0].seg_len - hdr_size : 0;
void *context = frag->rdma.context;
void *cbdata = frag->rdma.cbdata;
void *data = (void *) (hdr + 1);
frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, total_size,
MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
if (frag->rdma.sent) {
if (MCA_BTL_VADER_OP_GET == hdr->type) {
memcpy (frag->rdma.local_address, data, len);
} else if ((MCA_BTL_VADER_OP_ATOMIC == hdr->type || MCA_BTL_VADER_OP_CSWAP == hdr->type) &&
frag->rdma.local_address) {
if (8 == len) {
*((int64_t *) frag->rdma.local_address) = hdr->operand[0];
} else {
*((int32_t *) frag->rdma.local_address) = (int32_t) hdr->operand[0];
}
}
}
frag->base.des_cbfunc = des_cbfunc;
if (frag->rdma.remaining) {
size_t packet_size = (frag->rdma.remaining + hdr_size) <= mca_btl_vader.super.btl_max_send_size ?
frag->rdma.remaining : mca_btl_vader.super.btl_max_send_size - hdr_size;
/* advance the local and remote pointers */
frag->rdma.local_address = (void *)((uintptr_t) frag->rdma.local_address + len);
frag->rdma.remote_address += len;
if (MCA_BTL_VADER_OP_PUT == hdr->type) {
/* copy the next block into the fragment buffer */
memcpy ((void *) (hdr + 1), frag->rdma.local_address, packet_size);
}
hdr->addr = frag->rdma.remote_address;
/* clear out the complete flag before sending the fragment again */
frag->hdr->flags &= ~MCA_BTL_VADER_FLAG_COMPLETE;
frag->segments[0].seg_len = packet_size + sizeof (*hdr);
frag->rdma.sent += packet_size;
frag->rdma.remaining -= packet_size;
/* send is always successful */
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
return;
}
/* return the fragment before calling the callback */
MCA_BTL_VADER_FRAG_RETURN(frag);
cbfunc (btl, endpoint, (void *)((uintptr_t) frag->rdma.local_address - frag->rdma.sent), NULL,
context, cbdata, status);
}
static inline int
mca_btl_vader_rdma_frag_start (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type,
uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, int order,
int flags, size_t size, void *local_address, int64_t remote_address,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_btl_vader_sc_emu_hdr_t *hdr;
size_t hdr_size = sizeof (*hdr);
size_t packet_size = (size + hdr_size) <= mca_btl_vader.super.btl_max_send_size ? size :
mca_btl_vader.super.btl_max_send_size - hdr_size;
mca_btl_vader_frag_t *frag;
frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, packet_size + hdr_size,
MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
if (OPAL_UNLIKELY(NULL == frag)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
frag->base.des_cbfunc = (mca_btl_base_completion_fn_t) mca_btl_vader_rdma_frag_advance;
frag->rdma.local_address = local_address;
frag->rdma.remote_address = remote_address;
frag->rdma.cbfunc = cbfunc;
frag->rdma.context = cbcontext;
frag->rdma.cbdata = cbdata;
frag->rdma.remaining = size;
frag->rdma.sent = 0;
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
@ -183,7 +246,8 @@ mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint
hdr->operand[0] = operand1;
hdr->operand[1] = operand2;
return frag;
mca_btl_vader_rdma_frag_advance (btl, endpoint, frag, OPAL_SUCCESS);
return OPAL_SUCCESS;
}
#endif /* MCA_BTL_VADER_SEND_FRAG_H */

Просмотреть файл

@ -4,6 +4,7 @@
* reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2019 Google, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -156,49 +157,15 @@ int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
}
#endif
static void mca_btl_vader_sc_emu_get_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
mca_btl_base_descriptor_t *desc, int status)
{
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
mca_btl_vader_sc_emu_hdr_t *hdr;
void *local_address = frag->rdma.local_address;
size_t len = frag->segments[0].seg_len - sizeof (*hdr);
void *context = frag->rdma.context;
void *cbdata = frag->rdma.cbdata;
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
void *data;
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
data = (void *) (hdr + 1);
memcpy (local_address, data, len);
/* return the fragment before calling the callback */
MCA_BTL_VADER_FRAG_RETURN(frag);
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
}
int mca_btl_vader_get_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_btl_vader_frag_t *frag;
if (size > mca_btl_vader.super.btl_get_limit) {
return OPAL_ERR_NOT_AVAILABLE;
}
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size,
local_address, remote_address, cbfunc, cbcontext, cbdata,
mca_btl_vader_sc_emu_get_complete);
if (OPAL_UNLIKELY(NULL == frag)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* send is always successful */
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
return OPAL_SUCCESS;
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size,
local_address, remote_address, cbfunc, cbcontext, cbdata);
}

Просмотреть файл

@ -4,6 +4,7 @@
* reserved.
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2019 Google, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -135,21 +136,6 @@ int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
}
#endif
static void mca_btl_vader_sc_emu_put_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
mca_btl_base_descriptor_t *desc, int status)
{
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
void *local_address = frag->rdma.local_address;
void *context = frag->rdma.context;
void *cbdata = frag->rdma.cbdata;
mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc;
/* return the fragment first since the callback may call put/get/amo and could use this fragment */
MCA_BTL_VADER_FRAG_RETURN(frag);
cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status);
}
/**
* @brief Provides an emulated put path which uses copy-in copy-out with shared memory buffers
*/
@ -158,26 +144,10 @@ int mca_btl_vader_put_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_btl_vader_sc_emu_hdr_t *hdr;
mca_btl_vader_frag_t *frag;
if (size > mca_btl_vader.super.btl_put_limit) {
return OPAL_ERR_NOT_AVAILABLE;
}
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size,
local_address, remote_address, cbfunc, cbcontext, cbdata,
mca_btl_vader_sc_emu_put_complete);
if (OPAL_UNLIKELY(NULL == frag)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
memcpy ((void *) (hdr + 1), local_address, size);
/* send is always successful */
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
return OPAL_SUCCESS;
return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size,
local_address, remote_address, cbfunc, cbcontext, cbdata);
}