
This commit updates the btl interface to change the parameters passed to receive callbacks. The interface used to pass the tag, a btl base descriptor, and the callback context. Most of the values in the btl base descriptor were unused and only helped simplify the callbacks from the self btl. All of the arguments have now been replaced with a single receive callback descriptor. This descriptor contains the incoming endpoint, data segment(s), tag, and callback context. All btls have been updated to use the new callback and the btl interface version has been bumped to v3.2.0. As part of this change the descriptor argument (and the segments contained within it) have been marked as const. The were treated as const before but this change could allow the compiler to make better optimization decisions and will enforce that the callback does not attempt to change the data in the descriptor. Signed-off-by: Nathan Hjelm <hjelmn@google.com>
201 строка
6.9 KiB
C
201 строка
6.9 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* $COPYRIGHT$
|
|
* Copyright (c) 2018 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2018 Intel Inc. All rights reserved
|
|
* Copyright (c) 2020 Google, LLC. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "btl_ofi.h"
|
|
#include "btl_ofi_frag.h"
|
|
#include "btl_ofi_rdma.h"
|
|
#include "btl_ofi_endpoint.h"
|
|
|
|
static void mca_btl_ofi_base_frag_constructor (mca_btl_ofi_base_frag_t *frag)
|
|
{
|
|
/* zero everything out */
|
|
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
|
|
|
|
frag->base.des_segments = frag->segments;
|
|
frag->base.des_segment_count = 1;
|
|
}
|
|
|
|
static void mca_btl_ofi_base_frag_destructor (mca_btl_ofi_base_frag_t *frag)
|
|
{
|
|
|
|
}
|
|
|
|
OBJ_CLASS_INSTANCE(mca_btl_ofi_base_frag_t,
|
|
mca_btl_base_descriptor_t,
|
|
mca_btl_ofi_base_frag_constructor,
|
|
mca_btl_ofi_base_frag_destructor);
|
|
|
|
OBJ_CLASS_INSTANCE(mca_btl_ofi_frag_completion_t,
|
|
opal_free_list_item_t,
|
|
NULL,
|
|
NULL);
|
|
|
|
mca_btl_ofi_frag_completion_t *mca_btl_ofi_frag_completion_alloc
|
|
(mca_btl_base_module_t *btl,
|
|
mca_btl_ofi_context_t *context,
|
|
mca_btl_ofi_base_frag_t *frag,
|
|
int type)
|
|
{
|
|
mca_btl_ofi_frag_completion_t *comp;
|
|
|
|
comp = (mca_btl_ofi_frag_completion_t*) opal_free_list_get(&context->frag_comp_list);
|
|
comp->base.btl = btl;
|
|
comp->base.my_context = context;
|
|
comp->base.my_list = &context->frag_comp_list;
|
|
comp->base.type = type;
|
|
|
|
comp->frag = frag;
|
|
comp->comp_ctx.comp = comp;
|
|
|
|
return comp;
|
|
}
|
|
|
|
|
|
mca_btl_base_descriptor_t *mca_btl_ofi_alloc(
|
|
mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
uint8_t order, size_t size, uint32_t flags)
|
|
{
|
|
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t*) btl;
|
|
mca_btl_ofi_base_frag_t *frag = NULL;
|
|
mca_btl_ofi_context_t *context = get_ofi_context(ofi_btl);
|
|
|
|
frag = mca_btl_ofi_frag_alloc(ofi_btl, &context->frag_list, endpoint);
|
|
|
|
if (OPAL_LIKELY(frag)) {
|
|
frag->segments[0].seg_addr.pval = frag + 1;
|
|
frag->segments[0].seg_len = size;
|
|
|
|
frag->base.des_segment_count = 1;
|
|
frag->base.des_segments = &frag->segments[0];
|
|
frag->base.des_flags = flags;
|
|
frag->base.order = order;
|
|
frag->hdr.len = size;
|
|
}
|
|
|
|
return (mca_btl_base_descriptor_t*) frag;
|
|
}
|
|
|
|
int mca_btl_ofi_free (mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des)
|
|
{
|
|
/* return the frag to the free list. */
|
|
mca_btl_ofi_frag_return ((mca_btl_ofi_base_frag_t*) des);
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
int mca_btl_ofi_send (mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
mca_btl_base_descriptor_t *descriptor,
|
|
mca_btl_base_tag_t tag)
|
|
{
|
|
int rc = 0;
|
|
mca_btl_ofi_context_t *context;
|
|
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t*) btl;
|
|
mca_btl_ofi_endpoint_t *ofi_ep = (mca_btl_ofi_endpoint_t*) endpoint;
|
|
mca_btl_ofi_base_frag_t *frag = (mca_btl_ofi_base_frag_t*) descriptor;
|
|
mca_btl_ofi_frag_completion_t *comp;
|
|
|
|
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
|
|
|
/* This tag is the active message tag for the remote side */
|
|
frag->hdr.tag = tag;
|
|
|
|
/* create completion context */
|
|
context = get_ofi_context(ofi_btl);
|
|
comp = mca_btl_ofi_frag_completion_alloc(btl, context, frag,
|
|
MCA_BTL_OFI_TYPE_SEND);
|
|
|
|
/* send the frag. Note that we start sending from BTL header + payload
|
|
* because we need the other side to have this header information. */
|
|
rc = fi_send(context->tx_ctx,
|
|
&frag->hdr,
|
|
sizeof(mca_btl_ofi_header_t) + frag->hdr.len,
|
|
NULL,
|
|
ofi_ep->peer_addr,
|
|
&comp->comp_ctx);
|
|
|
|
if (OPAL_UNLIKELY(FI_SUCCESS != rc)) {
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
MCA_BTL_OFI_NUM_SEND_INC(ofi_btl);
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
int mca_btl_ofi_recv_frag (mca_btl_ofi_module_t *ofi_btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
mca_btl_ofi_context_t *context,
|
|
mca_btl_ofi_base_frag_t *frag)
|
|
{
|
|
int rc;
|
|
mca_btl_active_message_callback_t *reg = mca_btl_base_active_message_trigger + frag->hdr.tag;
|
|
mca_btl_base_segment_t segment = {.seg_addr.pval = (void *)(frag + 1),
|
|
.seg_len = frag->hdr.len};
|
|
/* Tell PML where the payload is */
|
|
mca_btl_base_receive_descriptor_t recv_desc = {.endpoint = endpoint,
|
|
.des_segments = &segment,
|
|
.des_segment_count = 1,
|
|
.tag = frag->hdr.tag,
|
|
.cbdata = reg->cbdata};
|
|
|
|
/* call the callback */
|
|
reg->cbfunc (&ofi_btl->super, &recv_desc);
|
|
mca_btl_ofi_frag_complete(frag, OPAL_SUCCESS);
|
|
|
|
/* repost the recv */
|
|
rc = mca_btl_ofi_post_recvs((mca_btl_base_module_t*) ofi_btl, context, 1);
|
|
if (OPAL_SUCCESS != rc) {
|
|
/* might not be that bad but let's just fail here. */
|
|
BTL_ERROR(("failed reposting receive."));
|
|
MCA_BTL_OFI_ABORT();
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
struct mca_btl_base_descriptor_t *mca_btl_ofi_prepare_src (
|
|
mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
opal_convertor_t *convertor,
|
|
uint8_t order, size_t reserve,
|
|
size_t *size, uint32_t flags)
|
|
{
|
|
struct iovec iov;
|
|
size_t length;
|
|
uint32_t iov_count = 1;
|
|
mca_btl_ofi_base_frag_t *frag;
|
|
|
|
/* allocate the frag with reserve. */
|
|
frag = (mca_btl_ofi_base_frag_t*) mca_btl_ofi_alloc(btl, endpoint,
|
|
order, reserve, flags);
|
|
if (OPAL_UNLIKELY(NULL == frag)) {
|
|
return NULL;
|
|
}
|
|
|
|
/* pack the data after the reserve */
|
|
iov.iov_len = *size;
|
|
iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)(frag->segments[0].seg_addr.pval)) + reserve);
|
|
opal_convertor_pack(convertor, &iov, &iov_count, &length);
|
|
|
|
/* pass on frag information */
|
|
frag->base.des_segments = frag->segments;
|
|
frag->base.des_flags = flags;
|
|
frag->base.order = MCA_BTL_NO_ORDER;
|
|
frag->segments[0].seg_len += length;
|
|
frag->hdr.len += length;
|
|
*size = length;
|
|
|
|
return &frag->base;
|
|
}
|