1
1

Timeout! Per RFC update the BTL interface to hide segment keys. All BTLs (with the exception of wv), all relevant PMLs, and osc/rdma have been updated for the new interface.

This commit was SVN r26626.
Этот коммит содержится в:
Nathan Hjelm 2012-06-21 17:09:12 +00:00
родитель 9af72072a3
Коммит 249066e06d
76 изменённых файлов: 898 добавлений и 863 удалений

Просмотреть файл

@ -230,32 +230,26 @@ typedef void (*mca_btl_base_completion_fn_t)(
/**
* Describes a region/segment of memory that is addressable
* by an BTL.
*
* Note: In many cases the alloc and prepare methods of BTLs
* do not return a mca_btl_base_segment_t but instead return a
* subclass. Extreme care should be used when modifying
* BTL segments to prevent overwriting internal BTL data.
*
* All BTLs MUST use base segments when calling registered
* Callbacks.
*
* BTL MUST use mca_btl_base_segment_t or a subclass and
* MUST store their segment length in btl_seg_size. BTLs
* MIST specify a segment no larger than MCA_BTL_SEG_MAX_SIZE.
*
*/
struct mca_btl_base_segment_t {
/** Address of the memory */
ompi_ptr_t seg_addr;
/** Length in bytes */
uint32_t seg_len;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/** Heterogeneous padding */
uint8_t seg_padding[4];
#endif
/** Memory segment key required by some RDMA networks */
union {
uint32_t key32[4];
uint64_t key64[2];
uint8_t key8[16];
#if OMPI_CUDA_SUPPORT
uint8_t cudakey[128]; /* 64 bytes for CUDA mem handle, 64 bytes for CUDA event handle */
#endif /* OMPI_CUDA_SUPPORT */
} seg_key;
#if OMPI_CUDA_SUPPORT
/** Address of the entire memory handle */
ompi_ptr_t memh_seg_addr;
/** Length in bytes of entire memory handle */
uint32_t memh_seg_len;
#endif /* OMPI_CUDA_SUPPORT */
uint64_t seg_len;
};
typedef struct mca_btl_base_segment_t mca_btl_base_segment_t;
@ -314,6 +308,12 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_base_descriptor_t);
*/
#define MCA_BTL_DES_MAX_SEGMENTS 16
/**
* Maximum size of a BTL segment (NTH: does it really save us anything
* to hardcode this?)
*/
#define MCA_BTL_SEG_MAX_SIZE 256
/*
* BTL base header, stores the tag at a minimum
*/
@ -380,6 +380,8 @@ typedef int (*mca_btl_base_component_progress_fn_t)(void);
* completion function, this implies that all data payload in the
* mca_btl_base_descriptor_t must be copied out within this callback or
* forfeited back to the BTL.
* Note also that descriptor segments (des_dst, des_src) must be base
* segments for all callbacks.
*
* @param[IN] btl BTL module
* @param[IN] tag The active message receive callback tag value
@ -796,6 +798,7 @@ struct mca_btl_base_module_t {
uint32_t btl_latency; /**< relative ranking of latency used to prioritize btls */
uint32_t btl_bandwidth; /**< bandwidth (Mbytes/sec) supported by each endpoint */
uint32_t btl_flags; /**< flags (put/get...) */
size_t btl_seg_size; /**< size of a btl segment */
/* BTL function table */
mca_btl_base_module_add_procs_fn_t btl_add_procs;

Просмотреть файл

@ -641,6 +641,7 @@ mca_btl_elan_module_t mca_btl_elan_module = {
0, /* latency */
0, /* bandwidth */
0, /* flags */
0, /* segment size */
mca_btl_elan_add_procs,
mca_btl_elan_del_procs,
NULL, /* btl_register */

Просмотреть файл

@ -92,6 +92,7 @@ int mca_btl_elan_component_open(void)
mca_btl_elan_module.super.btl_rdma_pipeline_frag_size = 128 * 1024;
mca_btl_elan_module.super.btl_min_rdma_pipeline_size = 128 * 1024;
mca_btl_elan_module.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND;
mca_btl_elan_module.super.btl_seg_size = sizeof (mca_btl_base_segment_t);
mca_btl_elan_module.super.btl_bandwidth = 1959;
mca_btl_elan_module.super.btl_latency = 4;
mca_btl_base_param_register(&mca_btl_elan_component.super.btl_version,

Просмотреть файл

@ -181,10 +181,10 @@ mca_btl_base_descriptor_t* mca_btl_mx_alloc( struct mca_btl_base_module_t* btl,
if( OPAL_UNLIKELY(NULL == frag) ) {
return NULL;
}
frag->segment[0].seg_len =
frag->segment[0].base.seg_len =
size <= mx_btl->super.btl_eager_limit ?
size : mx_btl->super.btl_eager_limit ;
frag->segment[0].seg_addr.pval = (void*)(frag+1);
frag->segment[0].base.seg_addr.pval = (void*)(frag+1);
frag->base.des_src = frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_flags = flags;
@ -264,7 +264,7 @@ mca_btl_mx_prepare_src( struct mca_btl_base_module_t* btl,
return NULL;
}
frag->base.des_src_cnt = 1;
iov.iov_base = (void*)((unsigned char*)frag->segment[0].seg_addr.pval + reserve);
iov.iov_base = (void*)((unsigned char*)frag->segment[0].base.seg_addr.pval + reserve);
}
iov.iov_len = max_data;
@ -272,13 +272,13 @@ mca_btl_mx_prepare_src( struct mca_btl_base_module_t* btl,
*size = max_data;
if( 1 == frag->base.des_src_cnt ) {
frag->segment[0].seg_len = reserve + max_data;
frag->segment[0].base.seg_len = reserve + max_data;
if( 0 == reserve )
frag->segment[0].seg_addr.pval = iov.iov_base;
frag->segment[0].base.seg_addr.pval = iov.iov_base;
} else {
frag->segment[0].seg_len = reserve;
frag->segment[1].seg_len = max_data;
frag->segment[1].seg_addr.pval = iov.iov_base;
frag->segment[0].base.seg_len = reserve;
frag->segment[1].base.seg_len = max_data;
frag->segment[1].base.seg_addr.pval = iov.iov_base;
}
frag->base.des_src = frag->segment;
frag->base.des_flags = flags;
@ -321,16 +321,16 @@ mca_btl_base_descriptor_t* mca_btl_mx_prepare_dst( struct mca_btl_base_module_t*
return NULL;
}
frag->segment[0].seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment[0].seg_addr.pval) );
frag->segment[0].seg_key.key64[0] = (uint64_t)(intptr_t)frag;
frag->segment[0].base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment[0].base.seg_addr.pval) );
frag->segment[0].key = (uint64_t)(intptr_t)frag;
mx_segment.segment_ptr = frag->segment[0].seg_addr.pval;
mx_segment.segment_length = frag->segment[0].seg_len;
mx_segment.segment_ptr = frag->segment[0].base.seg_addr.pval;
mx_segment.segment_length = frag->segment[0].base.seg_len;
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
frag->segment[0].seg_key.key64[0],
BTL_MX_PUT_MASK, NULL, &(frag->mx_request) );
frag->segment[0].key, BTL_MX_PUT_MASK,
NULL, &(frag->mx_request) );
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
opal_output( 0, "Fail to re-register a fragment with the MX NIC ...\n" );
MCA_BTL_MX_FRAG_RETURN( btl, frag );
@ -370,6 +370,8 @@ static int mca_btl_mx_put( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor )
{
mca_btl_mx_segment_t *src_seg = (mca_btl_mx_segment_t *) descriptor->des_src;
mca_btl_mx_segment_t *dst_seg = (mca_btl_mx_segment_t *) descriptor->des_dst;
mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*)btl;
mca_btl_mx_frag_t* frag = (mca_btl_mx_frag_t*)descriptor;
mx_segment_t mx_segment[2];
@ -390,13 +392,12 @@ static int mca_btl_mx_put( struct mca_btl_base_module_t* btl,
descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
do {
mx_segment[i].segment_ptr = descriptor->des_src[i].seg_addr.pval;
mx_segment[i].segment_length = descriptor->des_src[i].seg_len;
mx_segment[i].segment_ptr = src_seg[i].base.seg_addr.pval;
mx_segment[i].segment_length = src_seg[i].base.seg_len;
} while (++i < descriptor->des_src_cnt);
mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt,
endpoint->mx_peer_addr,
descriptor->des_dst[0].seg_key.key64[0], frag,
endpoint->mx_peer_addr, dst_seg->key, frag,
&frag->mx_request );
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
@ -512,6 +513,8 @@ int mca_btl_mx_send( struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag )
{
mca_btl_mx_segment_t *src_seg = (mca_btl_mx_segment_t *) descriptor->des_src;
mca_btl_mx_segment_t *dst_seg = (mca_btl_mx_segment_t *) descriptor->des_dst;
mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*)btl;
mca_btl_mx_frag_t* frag = (mca_btl_mx_frag_t*)descriptor;
mx_segment_t mx_segment[2];
@ -533,15 +536,14 @@ int mca_btl_mx_send( struct mca_btl_base_module_t* btl,
frag->type = MCA_BTL_MX_SEND;
do {
mx_segment[i].segment_ptr = descriptor->des_src[i].seg_addr.pval;
mx_segment[i].segment_length = descriptor->des_src[i].seg_len;
total_length += descriptor->des_src[i].seg_len;
mx_segment[i].segment_ptr = src_seg[i].base.seg_addr.pval;
mx_segment[i].segment_length = src_seg[i].base.seg_len;
total_length += src_seg[i].base.seg_len;
} while (++i < descriptor->des_src_cnt);
tag64 = 0x01ULL | (((uint64_t)tag) << 8);
mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt,
endpoint->mx_peer_addr,
tag64, frag, &frag->mx_request );
endpoint->mx_peer_addr, tag64, frag, &frag->mx_request );
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
return OMPI_ERROR;
@ -681,6 +683,7 @@ mca_btl_mx_module_t mca_btl_mx_module = {
0, /* exclusivity */
0, /* latency */
0, /* bandwidth */
0, /* segment size */
MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_PUT, /* flags */
mca_btl_mx_add_procs,
mca_btl_mx_del_procs,

Просмотреть файл

@ -156,6 +156,7 @@ static int mca_btl_mx_component_register(void)
MCA_BTL_FLAGS_PUT |
MCA_BTL_FLAGS_SEND |
MCA_BTL_FLAGS_RDMA_MATCHED);
mca_btl_mx_module.super.btl_seg_size = sizeof (mca_btl_mx_segment_t);
mca_btl_mx_module.super.btl_bandwidth = 2000;
mca_btl_mx_module.super.btl_latency = 5;
mca_btl_base_param_register(&mca_btl_mx_component.super.btl_version,

Просмотреть файл

@ -28,13 +28,18 @@
#define MCA_BTL_MX_RECV 0x02
BEGIN_C_DECLS
struct mca_btl_mx_segment_t {
mca_btl_base_segment_t base;
uint64_t key;
};
/**
* MX send framxent derived type.
*/
struct mca_btl_mx_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment[2];
mca_btl_mx_segment_t segment[2];
struct mca_btl_base_endpoint_t* endpoint;
uint8_t type;
mx_request_t mx_request;

Просмотреть файл

@ -52,6 +52,7 @@ mca_btl_ud_module_t mca_btl_ofud_module = {
0, /* latency */
0, /* bandwidth */
MCA_BTL_FLAGS_SEND,
0, /* segment length */
mca_btl_ud_add_procs,
mca_btl_ud_del_procs,
NULL,

Просмотреть файл

@ -175,6 +175,8 @@ static int mca_btl_ud_component_register(void)
"Approximate maximum bandwidth of interconnect",
800, (int*)&mca_btl_ofud_module.super.btl_bandwidth);
mca_btl_ofud_module.super.btl_seg_size = sizeof (mca_btl_base_segment_t);
mca_btl_ofud_module.super.btl_eager_limit -= sizeof(mca_btl_ud_header_t);
mca_btl_ofud_module.super.btl_max_send_size -= sizeof(mca_btl_ud_header_t);

Просмотреть файл

@ -87,6 +87,7 @@ mca_btl_openib_module_t mca_btl_openib_module = {
0, /* latency */
0, /* bandwidth */
0, /* TODO this should be PUT btl flags */
0, /* segment size */
mca_btl_openib_add_procs,
mca_btl_openib_del_procs,
NULL,
@ -865,7 +866,7 @@ ib_frag_alloc(mca_btl_openib_module_t *btl, size_t size, uint8_t order,
return NULL;
/* not all upper layer users set this */
to_base_frag(item)->segment.seg_len = size;
to_base_frag(item)->segment.base.seg_len = size;
to_base_frag(item)->base.order = order;
to_base_frag(item)->base.des_flags = flags;
@ -895,7 +896,7 @@ static mca_btl_openib_send_frag_t *check_coalescing(opal_list_t *frag_list,
}
total_length = size + frag->coalesced_length +
to_base_frag(frag)->segment.seg_len +
to_base_frag(frag)->segment.base.seg_len +
sizeof(mca_btl_openib_header_coalesced_t);
qp = to_base_frag(frag)->base.order;
@ -973,8 +974,8 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
sfrag->hdr->tag = MCA_BTL_TAG_BTL;
ctrl_hdr->type = MCA_BTL_OPENIB_CONTROL_COALESCED;
clsc_hdr->tag = org_tag;
clsc_hdr->size = to_base_frag(sfrag)->segment.seg_len;
clsc_hdr->alloc_size = to_base_frag(sfrag)->segment.seg_len;
clsc_hdr->size = to_base_frag(sfrag)->segment.base.seg_len;
clsc_hdr->alloc_size = to_base_frag(sfrag)->segment.base.seg_len;
if(ep->nbo)
BTL_OPENIB_HEADER_COALESCED_HTON(*clsc_hdr);
sfrag->coalesced_length = sizeof(mca_btl_openib_control_header_t) +
@ -984,13 +985,13 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
cfrag->hdr = (mca_btl_openib_header_coalesced_t*)((unsigned char*)(sfrag->hdr + 1) +
sfrag->coalesced_length +
to_base_frag(sfrag)->segment.seg_len);
to_base_frag(sfrag)->segment.base.seg_len);
cfrag->hdr = (mca_btl_openib_header_coalesced_t*)BTL_OPENIB_ALIGN_COALESCE_HDR(cfrag->hdr);
cfrag->hdr->alloc_size = size;
/* point coalesced frag pointer into a data buffer */
to_base_frag(cfrag)->segment.seg_addr.pval = cfrag->hdr + 1;
to_base_frag(cfrag)->segment.seg_len = size;
to_base_frag(cfrag)->segment.base.seg_addr.pval = cfrag->hdr + 1;
to_base_frag(cfrag)->segment.base.seg_len = size;
/* save coalesced fragment on a main fragment; we will need it after send
* completion to free it and to call upper layer callback */
@ -1131,18 +1132,14 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
to_base_frag(frag)->base.order = order;
to_base_frag(frag)->base.des_flags = flags;
to_base_frag(frag)->segment.seg_len = max_data;
to_base_frag(frag)->segment.seg_addr.pval = iov.iov_base;
to_base_frag(frag)->segment.seg_key.key32[0] =
(uint32_t)frag->sg_entry.lkey;
to_base_frag(frag)->segment.base.seg_len = max_data;
to_base_frag(frag)->segment.base.seg_addr.pval = iov.iov_base;
to_base_frag(frag)->segment.key = frag->sg_entry.lkey;
assert(MCA_BTL_NO_ORDER == order);
BTL_VERBOSE(("frag->sg_entry.lkey = %" PRIu32 " .addr = %" PRIx64
" frag->segment.seg_key.key32[0] = %" PRIu32,
frag->sg_entry.lkey,
frag->sg_entry.addr,
frag->sg_entry.lkey));
BTL_VERBOSE(("frag->sg_entry.lkey = %" PRIu32 " .addr = %" PRIx64,
frag->sg_entry.lkey, frag->sg_entry.addr));
return &to_base_frag(frag)->base;
}
@ -1163,14 +1160,14 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
return NULL;
iov.iov_len = max_data;
iov.iov_base = (IOVBASE_TYPE *) ( (unsigned char*)to_base_frag(frag)->segment.seg_addr.pval +
iov.iov_base = (IOVBASE_TYPE *) ( (unsigned char*)to_base_frag(frag)->segment.base.seg_addr.pval +
reserve );
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
*size = max_data;
/* not all upper layer users set this */
to_base_frag(frag)->segment.seg_len = max_data + reserve;
to_base_frag(frag)->segment.base.seg_len = max_data + reserve;
return &to_base_frag(frag)->base;
}
@ -1253,16 +1250,14 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (uint64_t)(uintptr_t)buffer;
to_base_frag(frag)->segment.seg_addr.pval = buffer;
to_base_frag(frag)->segment.seg_len = *size;
to_base_frag(frag)->segment.seg_key.key32[0] = openib_reg->mr->rkey;
to_base_frag(frag)->segment.base.seg_addr.pval = buffer;
to_base_frag(frag)->segment.base.seg_len = *size;
to_base_frag(frag)->segment.key = openib_reg->mr->rkey;
to_base_frag(frag)->base.order = order;
to_base_frag(frag)->base.des_flags = flags;
BTL_VERBOSE(("frag->sg_entry.lkey = %" PRIu32 " .addr = %" PRIx64 " "
"frag->segment.seg_key.key32[0] = %" PRIu32,
frag->sg_entry.lkey,
frag->sg_entry.addr,
"rkey = %" PRIu32, frag->sg_entry.lkey, frag->sg_entry.addr,
openib_reg->mr->rkey));
return &to_base_frag(frag)->base;
@ -1479,14 +1474,14 @@ int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl,
}
frag = to_base_frag(item);
hdr = to_send_frag(item)->hdr;
frag->segment.seg_len = size;
frag->segment.base.seg_len = size;
frag->base.order = qp;
frag->base.des_flags = flags;
hdr->tag = tag;
to_com_frag(item)->endpoint = ep;
/* put match header */
memcpy(frag->segment.seg_addr.pval, header, header_size);
memcpy(frag->segment.base.seg_addr.pval, header, header_size);
/* Pack data */
if(payload_size) {
@ -1494,7 +1489,7 @@ int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl,
struct iovec iov;
uint32_t iov_count;
/* pack the data into the supplied buffer */
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)frag->segment.seg_addr.pval + header_size);
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)frag->segment.base.seg_addr.pval + header_size);
iov.iov_len = max_data = payload_size;
iov_count = 1;
@ -1608,11 +1603,13 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* ep,
mca_btl_base_descriptor_t* descriptor)
{
mca_btl_openib_segment_t *src_seg = (mca_btl_openib_segment_t *) descriptor->des_src;
mca_btl_openib_segment_t *dst_seg = (mca_btl_openib_segment_t *) descriptor->des_dst;
struct ibv_send_wr* bad_wr;
mca_btl_openib_out_frag_t* frag = to_out_frag(descriptor);
int qp = descriptor->order;
uint64_t rem_addr = descriptor->des_dst->seg_addr.lval;
uint32_t rkey = descriptor->des_dst->seg_key.key32[0];
uint64_t rem_addr = dst_seg->base.seg_addr.lval;
uint32_t rkey = dst_seg->key;
assert(openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_SEND_USER ||
openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_SEND);
@ -1652,9 +1649,8 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
frag->sr_desc.wr.rdma.remote_addr = rem_addr;
frag->sr_desc.wr.rdma.rkey = rkey;
to_com_frag(frag)->sg_entry.addr =
(uint64_t)(uintptr_t)descriptor->des_src->seg_addr.pval;
to_com_frag(frag)->sg_entry.length = descriptor->des_src->seg_len;
to_com_frag(frag)->sg_entry.addr = src_seg->base.seg_addr.lval;
to_com_frag(frag)->sg_entry.length = src_seg->base.seg_len;
to_com_frag(frag)->endpoint = ep;
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
@ -1665,7 +1661,7 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
/* Setting opcode on a frag constructor isn't enough since prepare_src
* may return send_frag instead of put_frag */
frag->sr_desc.opcode = IBV_WR_RDMA_WRITE;
frag->sr_desc.send_flags = ib_send_flags(descriptor->des_src->seg_len, &(ep->qps[qp]));
frag->sr_desc.send_flags = ib_send_flags(src_seg->base.seg_len, &(ep->qps[qp]));
if(ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr))
return OMPI_ERROR;
@ -1680,11 +1676,13 @@ int mca_btl_openib_get(mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* ep,
mca_btl_base_descriptor_t* descriptor)
{
mca_btl_openib_segment_t *src_seg = (mca_btl_openib_segment_t *) descriptor->des_src;
mca_btl_openib_segment_t *dst_seg = (mca_btl_openib_segment_t *) descriptor->des_dst;
struct ibv_send_wr* bad_wr;
mca_btl_openib_get_frag_t* frag = to_get_frag(descriptor);
int qp = descriptor->order;
uint64_t rem_addr = descriptor->des_src->seg_addr.lval;
uint32_t rkey = descriptor->des_src->seg_key.key32[0];
uint64_t rem_addr = src_seg->base.seg_addr.lval;
uint32_t rkey = src_seg->key;
assert(openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_RECV_USER);
@ -1733,9 +1731,8 @@ int mca_btl_openib_get(mca_btl_base_module_t* btl,
frag->sr_desc.wr.rdma.remote_addr = rem_addr;
frag->sr_desc.wr.rdma.rkey = rkey;
to_com_frag(frag)->sg_entry.addr =
(uint64_t)(uintptr_t)descriptor->des_dst->seg_addr.pval;
to_com_frag(frag)->sg_entry.length = descriptor->des_dst->seg_len;
to_com_frag(frag)->sg_entry.addr = dst_seg->base.seg_addr.lval;
to_com_frag(frag)->sg_entry.length = dst_seg->base.seg_len;
to_com_frag(frag)->endpoint = ep;
#if HAVE_XRC

Просмотреть файл

@ -480,7 +480,7 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
mca_btl_openib_module_t *obtl = (mca_btl_openib_module_t*)btl;
mca_btl_openib_endpoint_t* ep = to_com_frag(des)->endpoint;
mca_btl_openib_control_header_t *ctl_hdr =
(mca_btl_openib_control_header_t *) to_base_frag(des)->segment.seg_addr.pval;
(mca_btl_openib_control_header_t *) to_base_frag(des)->segment.base.seg_addr.pval;
mca_btl_openib_eager_rdma_header_t *rdma_hdr;
mca_btl_openib_header_coalesced_t *clsc_hdr =
(mca_btl_openib_header_coalesced_t*)(ctl_hdr + 1);
@ -827,6 +827,8 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbfunc = btl_openib_control;
mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbdata = NULL;
openib_btl->super.btl_seg_size = sizeof (mca_btl_openib_segment_t);
/* Check bandwidth configured for this device */
sprintf(param, "bandwidth_%s", ibv_get_device_name(device->ib_dev));
openib_btl->super.btl_bandwidth =
@ -1388,7 +1390,7 @@ static void merge_values(ompi_btl_openib_ini_values_t *target,
static bool inline is_credit_message(const mca_btl_openib_recv_frag_t *frag)
{
mca_btl_openib_control_header_t* chdr =
(mca_btl_openib_control_header_t *) to_base_frag(frag)->segment.seg_addr.pval;
(mca_btl_openib_control_header_t *) to_base_frag(frag)->segment.base.seg_addr.pval;
return (MCA_BTL_TAG_BTL == frag->hdr->tag) &&
(MCA_BTL_OPENIB_CONTROL_CREDITS == chdr->type);
}
@ -1396,7 +1398,7 @@ static bool inline is_credit_message(const mca_btl_openib_recv_frag_t *frag)
static bool inline is_cts_message(const mca_btl_openib_recv_frag_t *frag)
{
mca_btl_openib_control_header_t* chdr =
(mca_btl_openib_control_header_t *) to_base_frag(frag)->segment.seg_addr.pval;
(mca_btl_openib_control_header_t *) to_base_frag(frag)->segment.base.seg_addr.pval;
return (MCA_BTL_TAG_BTL == frag->hdr->tag) &&
(MCA_BTL_OPENIB_CONTROL_CTS == chdr->type);
}
@ -3741,7 +3743,7 @@ static int progress_one_device(mca_btl_openib_device_t *device)
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
frag->hdr = (mca_btl_openib_header_t*)(((char*)frag->ftr) -
size - BTL_OPENIB_FTR_PADDING(size) + sizeof(mca_btl_openib_footer_t));
to_base_frag(frag)->segment.seg_addr.pval =
to_base_frag(frag)->segment.base.seg_addr.pval =
((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
ret = btl_openib_handle_incoming(btl, to_com_frag(frag)->endpoint,

Просмотреть файл

@ -541,7 +541,7 @@ void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
base_des->des_cbdata = NULL;
base_des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
base_des->order = mca_btl_openib_component.credits_qp;
openib_frag->segment.seg_len = sizeof(mca_btl_openib_control_header_t);
openib_frag->segment.base.seg_len = sizeof(mca_btl_openib_control_header_t);
com_frag->endpoint = endpoint;
sc_frag->hdr->tag = MCA_BTL_TAG_BTL;
@ -549,7 +549,7 @@ void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
sc_frag->hdr->credits = 0;
ctl_hdr = (mca_btl_openib_control_header_t*)
openib_frag->segment.seg_addr.pval;
openib_frag->segment.base.seg_addr.pval;
ctl_hdr->type = MCA_BTL_OPENIB_CONTROL_CTS;
/* Send the fragment */
@ -777,13 +777,13 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;;
to_com_frag(frag)->endpoint = endpoint;
frag->hdr->tag = MCA_BTL_TAG_BTL;
to_base_frag(frag)->segment.seg_len =
to_base_frag(frag)->segment.base.seg_len =
sizeof(mca_btl_openib_rdma_credits_header_t);
}
assert(frag->qp_idx == qp);
credits_hdr = (mca_btl_openib_rdma_credits_header_t*)
to_base_frag(frag)->segment.seg_addr.pval;
to_base_frag(frag)->segment.base.seg_addr.pval;
if(OMPI_SUCCESS == acquire_eager_rdma_send_credit(endpoint)) {
do_rdma = true;
} else {
@ -867,12 +867,12 @@ static int mca_btl_openib_endpoint_send_eager_rdma(
to_base_frag(frag)->base.des_cbdata = NULL;
to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp;
to_base_frag(frag)->segment.seg_len =
to_base_frag(frag)->segment.base.seg_len =
sizeof(mca_btl_openib_eager_rdma_header_t);
to_com_frag(frag)->endpoint = endpoint;
frag->hdr->tag = MCA_BTL_TAG_BTL;
rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)to_base_frag(frag)->segment.seg_addr.pval;
rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)to_base_frag(frag)->segment.base.seg_addr.pval;
rdma_hdr->control.type = MCA_BTL_OPENIB_CONTROL_RDMA;
rdma_hdr->rkey = endpoint->eager_rdma_local.reg->mr->rkey;
rdma_hdr->rdma_start.lval = ompi_ptr_ptol(endpoint->eager_rdma_local.base.pval);
@ -974,7 +974,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA;
to_com_frag(frag)->endpoint = endpoint;
frag->ftr = (mca_btl_openib_footer_t*)
((char*)to_base_frag(frag)->segment.seg_addr.pval +
((char*)to_base_frag(frag)->segment.base.seg_addr.pval +
mca_btl_openib_component.eager_limit);
MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr);

Просмотреть файл

@ -478,13 +478,13 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
mca_btl_openib_send_frag_t *frag, const bool rdma)
{
mca_btl_openib_module_t *openib_btl = ep->endpoint_btl;
mca_btl_base_segment_t *seg = &to_base_frag(frag)->segment;
mca_btl_openib_segment_t *seg = &to_base_frag(frag)->segment;
struct ibv_sge *sg = &to_com_frag(frag)->sg_entry;
struct ibv_send_wr *sr_desc = &to_out_frag(frag)->sr_desc;
struct ibv_send_wr *bad_wr;
int qp = to_base_frag(frag)->base.order;
sg->length = seg->seg_len + sizeof(mca_btl_openib_header_t) +
sg->length = seg->base.seg_len + sizeof(mca_btl_openib_header_t) +
(rdma ? sizeof(mca_btl_openib_footer_t) : 0) + frag->coalesced_length;
sr_desc->send_flags = ib_send_flags(sg->length, &(ep->qps[qp]));

Просмотреть файл

@ -675,12 +675,12 @@ static void mca_btl_openib_endpoint_notify(mca_btl_base_endpoint_t* endpoint, ui
to_base_frag(frag)->base.des_cbdata = NULL;
to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp;
to_base_frag(frag)->segment.seg_len =
to_base_frag(frag)->segment.base.seg_len =
sizeof(mca_btl_openib_broken_connection_header_t);
to_com_frag(frag)->endpoint = newep;
frag->hdr->tag = MCA_BTL_TAG_BTL;
bc_hdr = (mca_btl_openib_broken_connection_header_t*)to_base_frag(frag)->segment.seg_addr.pval;
bc_hdr = (mca_btl_openib_broken_connection_header_t*)to_base_frag(frag)->segment.base.seg_addr.pval;
bc_hdr->control.type = type;
bc_hdr->lid = endpoint->endpoint_btl->port_info.lid;
bc_hdr->subnet_id = endpoint->endpoint_btl->port_info.subnet_id;
@ -717,10 +717,10 @@ static void dump_local_rdma_frags(mca_btl_openib_endpoint_t * endpoint) {
frag->hdr = (mca_btl_openib_header_t*)(((char*)frag->ftr) -
size + sizeof(mca_btl_openib_footer_t));
to_base_frag(frag)->segment.seg_addr.pval =
to_base_frag(frag)->segment.base.seg_addr.pval =
((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
chdr = to_base_frag(frag)->segment.seg_addr.pval;
chdr = to_base_frag(frag)->segment.base.seg_addr.pval;
if ((MCA_BTL_TAG_BTL == frag->hdr->tag) &&
(MCA_BTL_OPENIB_CONTROL_CREDITS == chdr->type)) {
opal_output(0, "tag[%d] is credit message", i);

Просмотреть файл

@ -59,7 +59,7 @@ static void com_constructor(mca_btl_openib_com_frag_t *frag)
if(reg) {
frag->sg_entry.lkey = reg->mr->lkey;
base_frag->segment.seg_key.key32[0] = reg->mr->lkey;
base_frag->segment.key = reg->mr->lkey;
}
}
@ -67,7 +67,7 @@ static void out_constructor(mca_btl_openib_out_frag_t *frag)
{
mca_btl_openib_frag_t *base_frag = to_base_frag(frag);
base_frag->base.des_src = &base_frag->segment;
base_frag->base.des_src = &base_frag->segment.base;
base_frag->base.des_src_cnt = 1;
base_frag->base.des_dst = NULL;
base_frag->base.des_dst_cnt = 0;
@ -84,7 +84,7 @@ static void in_constructor(mca_btl_openib_in_frag_t *frag)
{
mca_btl_openib_frag_t *base_frag = to_base_frag(frag);
base_frag->base.des_dst = &base_frag->segment;
base_frag->base.des_dst = &base_frag->segment.base;
base_frag->base.des_dst_cnt = 1;
base_frag->base.des_src = NULL;
base_frag->base.des_src_cnt = 0;
@ -101,7 +101,7 @@ static void send_constructor(mca_btl_openib_send_frag_t *frag)
(((unsigned char*)base_frag->base.super.ptr) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t));
base_frag->segment.seg_addr.pval = frag->hdr + 1;
base_frag->segment.base.seg_addr.pval = frag->hdr + 1;
to_com_frag(frag)->sg_entry.addr = (uint64_t)(uintptr_t)frag->hdr;
frag->coalesced_length = 0;
OBJ_CONSTRUCT(&frag->coalesced_frags, opal_list_t);
@ -114,7 +114,7 @@ static void recv_constructor(mca_btl_openib_recv_frag_t *frag)
base_frag->type = MCA_BTL_OPENIB_FRAG_RECV;
frag->hdr = (mca_btl_openib_header_t*)base_frag->base.super.ptr;
base_frag->segment.seg_addr.pval =
base_frag->segment.base.seg_addr.pval =
((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
to_com_frag(frag)->sg_entry.addr = (uint64_t)(uintptr_t)frag->hdr;
@ -129,7 +129,7 @@ static void send_control_constructor(mca_btl_openib_send_control_frag_t *frag)
to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_CONTROL;
/* adjusting headers because there is no coalesce header in control messages */
frag->hdr = frag->chdr;
to_base_frag(frag)->segment.seg_addr.pval = frag->hdr + 1;
to_base_frag(frag)->segment.base.seg_addr.pval = frag->hdr + 1;
to_com_frag(frag)->sg_entry.addr = (uint64_t)(uintptr_t)frag->hdr;
}
@ -157,7 +157,7 @@ static void coalesced_constructor(mca_btl_openib_coalesced_frag_t *frag)
base_frag->type = MCA_BTL_OPENIB_FRAG_COALESCED;
base_frag->base.des_src = &base_frag->segment;
base_frag->base.des_src = &base_frag->segment.base;
base_frag->base.des_src_cnt = 1;
base_frag->base.des_dst = NULL;
base_frag->base.des_dst_cnt = 0;

Просмотреть файл

@ -284,10 +284,15 @@ typedef enum mca_btl_openib_frag_type_t mca_btl_openib_frag_type_t;
* IB fragment derived type.
*/
typedef struct mca_btl_openib_segment_t {
mca_btl_base_segment_t base;
uint32_t key;
} mca_btl_openib_segment_t;
/* base openib frag */
typedef struct mca_btl_openib_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
mca_btl_openib_segment_t segment;
mca_btl_openib_frag_type_t type;
ompi_free_list_t* list;
} mca_btl_openib_frag_t;

Просмотреть файл

@ -439,7 +439,7 @@ int ompi_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint)
/* Copy the lkey where it needs to go */
endpoint->endpoint_cts_frag.super.sg_entry.lkey =
endpoint->endpoint_cts_frag.super.super.segment.seg_key.key32[0] =
endpoint->endpoint_cts_frag.super.super.segment.key =
endpoint->endpoint_cts_mr->lkey;
endpoint->endpoint_cts_frag.super.sg_entry.length = length;

Просмотреть файл

@ -54,6 +54,7 @@ mca_btl_portals_module_t mca_btl_portals_module = {
0, /* latency */
0, /* bandwidth */
0, /* btl flags */
0, /* btl segment size */
mca_btl_portals_add_procs,
mca_btl_portals_del_procs,
@ -242,11 +243,11 @@ mca_btl_portals_alloc(struct mca_btl_base_module_t* btl_base,
if (size <= mca_btl_portals_module.super.btl_eager_limit) {
OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(&mca_btl_portals_module, frag, rc);
if (OMPI_SUCCESS != rc) return NULL;
frag->segments[0].seg_len = size;
frag->segments[0].base.seg_len = size;
} else {
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(&mca_btl_portals_module, frag, rc);
if (OMPI_SUCCESS != rc) return NULL;
frag->segments[0].seg_len =
frag->segments[0].base.seg_len =
size <= mca_btl_portals_module.super.btl_max_send_size ?
size : mca_btl_portals_module.super.btl_max_send_size ;
}
@ -323,7 +324,7 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segments[0].seg_addr.pval + reserve;
iov.iov_base = (unsigned char*) frag->segments[0].base.seg_addr.pval + reserve;
ret = opal_convertor_pack(convertor, &iov, &iov_count,
&max_data );
*size = max_data;
@ -331,7 +332,7 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
return NULL;
}
frag->segments[0].seg_len = max_data + reserve;
frag->segments[0].base.seg_len = max_data + reserve;
frag->base.des_src_cnt = 1;
} else {
@ -356,10 +357,9 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
frag->segments[0].seg_len = max_data;
frag->segments[0].seg_addr.pval = iov.iov_base;
frag->segments[0].seg_key.key64[0] =
OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
frag->segments[0].base.seg_len = max_data;
frag->segments[0].base.seg_addr.pval = iov.iov_base;
frag->segments[0].key = OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
frag->base.des_src_cnt = 1;
/* either a put or get. figure out which later */
@ -367,13 +367,13 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
"rdma src posted for frag 0x%lx, callback 0x%lx, bits %"PRIu64", flags say %d" ,
(unsigned long) frag,
(unsigned long) frag->base.des_cbfunc,
frag->segments[0].seg_key.key64[0], flags));
frag->segments[0].key, flags));
/* create a match entry */
ret = PtlMEAttach(mca_btl_portals_module.portals_ni_h,
OMPI_BTL_PORTALS_RDMA_TABLE_ID,
*((mca_btl_base_endpoint_t*) peer),
frag->segments[0].seg_key.key64[0], /* match */
frag->segments[0].key, /* match */
0, /* ignore */
PTL_UNLINK,
PTL_INS_AFTER,
@ -387,8 +387,8 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
}
/* setup the memory descriptor */
md.start = frag->segments[0].seg_addr.pval;
md.length = frag->segments[0].seg_len;
md.start = frag->segments[0].base.seg_addr.pval;
md.length = frag->segments[0].base.seg_len;
md.threshold = PTL_MD_THRESH_INF;
md.max_size = 0;
md.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;
@ -448,10 +448,9 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl_base,
return NULL;
}
frag->segments[0].seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segments[0].seg_addr.pval) );
frag->segments[0].seg_key.key64[0] =
OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
frag->segments[0].base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segments[0].base.seg_addr.pval) );
frag->segments[0].key = OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = frag->segments;
@ -462,14 +461,14 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl_base,
"rdma dest posted for frag 0x%lx, callback 0x%lx, bits %" PRIu64 " flags %d",
(unsigned long) frag,
(unsigned long) frag->base.des_cbfunc,
frag->segments[0].seg_key.key64[0],
frag->segments[0].key;
flags));
/* create a match entry */
ret = PtlMEAttach(mca_btl_portals_module.portals_ni_h,
OMPI_BTL_PORTALS_RDMA_TABLE_ID,
*((mca_btl_base_endpoint_t*) peer),
frag->segments[0].seg_key.key64[0], /* match */
frag->segments[0].key, /* match */
0, /* ignore */
PTL_UNLINK,
PTL_INS_AFTER,
@ -483,8 +482,8 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl_base,
}
/* setup the memory descriptor. */
md.start = frag->segments[0].seg_addr.pval;
md.length = frag->segments[0].seg_len;
md.start = frag->segments[0].base.seg_addr.pval;
md.length = frag->segments[0].base.seg_len;
md.threshold = PTL_MD_THRESH_INF;
md.max_size = 0;
md.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;

Просмотреть файл

@ -148,6 +148,7 @@ mca_btl_portals_component_open(void)
mca_btl_portals_module.super.btl_flags =
MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_RDMA_MATCHED;
mca_btl_portals_module.super.btl_seg_size = sizeof (mca_btl_portals_segment_t);
mca_btl_portals_module.super.btl_bandwidth = 1000;
mca_btl_portals_module.super.btl_latency = 0;
@ -317,6 +318,7 @@ mca_btl_portals_component_progress(void)
mca_btl_portals_frag_t *frag = NULL;
mca_btl_portals_recv_block_t *block = NULL;
mca_btl_base_tag_t tag;
mca_btl_base_segment_t seg[2];
if (0 == mca_btl_portals_module.portals_num_procs) {
return 0;
@ -421,11 +423,12 @@ mca_btl_portals_component_progress(void)
));
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,"received %d bytes \n", (int) ev.mlength));
frag->segments[0].seg_addr.pval = &frag->data;
frag->segments[0].seg_len = header_size;
frag->base.des_dst = seg;
seg[0].seg_addr.pval = &frag->data;
seg[0].seg_len = header_size;
if(ev.mlength) {
frag->segments[1].seg_addr.pval = ((((char*) ev.md.start) + ev.offset));
frag->segments[1].seg_len = ev.mlength;
seg[1].seg_addr.pval = ((((char*) ev.md.start) + ev.offset));
seg[1].seg_len = ev.mlength;
frag->base.des_dst_cnt = 2;
} else {
frag->base.des_dst_cnt = 1;
@ -433,8 +436,8 @@ mca_btl_portals_component_progress(void)
} else {
/* if we ever make this thread hot, need to do
something with the receive fragments */
frag->segments[0].seg_addr.pval = (((char*) ev.md.start) + ev.offset);
frag->segments[0].seg_len = ev.mlength;
seg[0].seg_addr.pval = (((char*) ev.md.start) + ev.offset);
seg[0].seg_len = ev.mlength;
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"received send fragment 0x%lx (thresh: %d, length %d)",
@ -454,6 +457,9 @@ mca_btl_portals_component_progress(void)
block->full = true;
}
/* NTH: is it ok to overwrite this. All callbacks should expect base segments */
frag->base.des_dst = seg;
mca_btl_base_active_message_trigger[tag].cbfunc(
&mca_btl_portals_module.super,
tag,

Просмотреть файл

@ -32,9 +32,9 @@ mca_btl_portals_frag_common_send_constructor(mca_btl_portals_frag_t* frag)
frag->base.des_src = frag->segments;
frag->base.des_src_cnt = 2;
frag->segments[0].seg_addr.pval = frag + 1;
frag->segments[0].seg_len = frag->size;
frag->segments[0].seg_key.key64[0] = 0;
frag->segments[0].base.seg_addr.pval = frag + 1;
frag->segments[0].base.seg_len = frag->size;
frag->segments[0].key = 0;
frag->md_h = PTL_INVALID_HANDLE;
}

Просмотреть файл

@ -23,12 +23,18 @@
BEGIN_C_DECLS
struct mca_btl_portals_segment_t {
mca_btl_base_segment_t base;
ptl_match_bits_t key;
};
typedef struct mca_btl_portals_segment_t mca_btl_portals_segment_t;
/**
* Portals send fragment derived type
*/
struct mca_btl_portals_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segments[1];
mca_btl_portals_segment_t segments[1];
/* needed for retransmit case */
struct mca_btl_base_endpoint_t *endpoint;
/* needed for retransmit case */

Просмотреть файл

@ -32,14 +32,14 @@ mca_btl_portals_put(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor)
{
mca_btl_portals_segment_t *dst_seg = (mca_btl_portals_segment_t *) descriptor->des_dst;
mca_btl_portals_frag_t *frag = (mca_btl_portals_frag_t*) descriptor;
int ret;
unsigned char hdr_data[8];
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"PtlPut (rdma) fragment %lx, bits %" PRIx64,
(unsigned long) frag,
frag->base.des_dst[0].seg_key.key64[0]));
(unsigned long) frag, dst_seg->key));
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
assert(frag->md_h != PTL_INVALID_HANDLE);
@ -55,7 +55,7 @@ mca_btl_portals_put(struct mca_btl_base_module_t* btl_base,
*((mca_btl_base_endpoint_t*) btl_peer),
OMPI_BTL_PORTALS_RDMA_TABLE_ID,
0, /* ac_index - not used*/
frag->base.des_dst[0].seg_key.key64[0], /* match bits */
dst_seg->key, /* match bits */
0, /* remote offset - not used */
*((ptl_hdr_data_t*) hdr_data)); /* hdr_data: tag */
if (ret != PTL_OK) {
@ -73,13 +73,13 @@ mca_btl_portals_get(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor)
{
mca_btl_portals_segment_t *src_seg = (mca_btl_portals_segment_t *) descriptor->des_src;
mca_btl_portals_frag_t *frag = (mca_btl_portals_frag_t*) descriptor;
int ret;
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"PtlGet (rdma) fragment %lx, bits %" PRIx64,
(unsigned long) frag,
frag->base.des_src[0].seg_key.key64[0]));
(unsigned long) frag, src_seg->key));
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
assert(frag->md_h != PTL_INVALID_HANDLE);
@ -91,7 +91,7 @@ mca_btl_portals_get(struct mca_btl_base_module_t* btl_base,
*((mca_btl_base_endpoint_t*) btl_peer),
OMPI_BTL_PORTALS_RDMA_TABLE_ID,
0, /* ac_index - not used*/
frag->base.des_src[0].seg_key.key64[0], /* match bits */
src_seg->key, /* match bits */
0); /* remote offset - not used */
if (ret != PTL_OK) {
opal_output(mca_btl_portals_component.portals_output,

Просмотреть файл

@ -63,9 +63,9 @@ mca_btl_portals_send(struct mca_btl_base_module_t* btl_base,
if (frag->md_h == PTL_INVALID_HANDLE) {
/* setup the send - always describe entire fragment */
mca_btl_portals_module.md_send.start = frag->segments[0].seg_addr.pval;
mca_btl_portals_module.md_send.start = frag->segments[0].base.seg_addr.pval;
mca_btl_portals_module.md_send.length =
0 == frag->size ? frag->segments[0].seg_len : frag->size;
0 == frag->size ? frag->segments[0].base.seg_len : frag->size;
#if OPAL_ENABLE_DEBUG
mca_btl_portals_module.md_send.options =
PTL_MD_EVENT_START_DISABLE;
@ -94,19 +94,19 @@ mca_btl_portals_send(struct mca_btl_base_module_t* btl_base,
"fragment info:\n"
"\tstart: 0x%lx\n"
"\tlen: %d",
(unsigned long) frag->segments[0].seg_addr.pval,
frag->segments[0].seg_len));
(unsigned long) frag->segments[0].base.seg_addr.pval,
frag->segments[0].base.seg_len));
ret = PtlPutRegion(frag->md_h, /* memory descriptor */
0, /* fragment offset */
frag->segments[0].seg_len, /* fragment length */
ret = PtlPutRegion(frag->md_h, /* memory descriptor */
0, /* fragment offset */
frag->segments[0].base.seg_len, /* fragment length */
(mca_btl_portals_component.portals_need_ack ? PTL_ACK_REQ : PTL_NO_ACK_REQ),
*((mca_btl_base_endpoint_t*) endpoint),
OMPI_BTL_PORTALS_SEND_TABLE_ID,
0, /* ac_index - not used */
0, /* match bits */
0, /* remote offset - not used */
*((ptl_hdr_data_t*) hdr_data)); /* hdr_data: tag */
0, /* ac_index - not used */
0, /* match bits */
0, /* remote offset - not used */
*((ptl_hdr_data_t*) hdr_data)); /* hdr_data: tag */
if (ret != PTL_OK) {
opal_output(mca_btl_portals_component.portals_output,
"send: PtlPut failed with error %d", ret);
@ -167,7 +167,7 @@ int mca_btl_portals_sendi(struct mca_btl_base_module_t* btl_base,
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
return OMPI_ERR_RESOURCE_BUSY;
}
frag->segments[0].seg_len = payload_size;
frag->segments[0].base.seg_len = payload_size;
frag->base.des_src_cnt = 1;
frag->base.des_flags = flags;
frag->base.order = MCA_BTL_NO_ORDER;
@ -175,7 +175,7 @@ int mca_btl_portals_sendi(struct mca_btl_base_module_t* btl_base,
if(payload_size) {
/* pack the data into the supplied buffer */
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)frag->segments[0].seg_addr.pval);
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)frag->segments[0].base.seg_addr.pval);
iov.iov_len = max_data = payload_size;
iov_count = 1;
@ -203,9 +203,9 @@ int mca_btl_portals_sendi(struct mca_btl_base_module_t* btl_base,
if (frag->md_h == PTL_INVALID_HANDLE) {
/* setup the send - always describe entire fragment */
mca_btl_portals_module.md_send.start = frag->segments[0].seg_addr.pval;
mca_btl_portals_module.md_send.start = frag->segments[0].base.seg_addr.pval;
mca_btl_portals_module.md_send.length =
0 == frag->size ? frag->segments[0].seg_len : frag->size;
0 == frag->size ? frag->segments[0].base.seg_len : frag->size;
#if OPAL_ENABLE_DEBUG
mca_btl_portals_module.md_send.options =
PTL_MD_EVENT_START_DISABLE;
@ -234,19 +234,19 @@ int mca_btl_portals_sendi(struct mca_btl_base_module_t* btl_base,
"fragment info:\n"
"\tstart: 0x%lx\n"
"\tlen: %d",
(unsigned long) frag->segments[0].seg_addr.pval,
frag->segments[0].seg_len));
(unsigned long) frag->segments[0].base.seg_addr.lval,
frag->segments[0].seg_len));
ret = PtlPutRegion(frag->md_h, /* memory descriptor */
0, /* fragment offset */
frag->segments[0].seg_len, /* fragment length */
ret = PtlPutRegion(frag->md_h, /* memory descriptor */
0, /* fragment offset */
frag->segments[0].base.seg_len, /* fragment length */
(mca_btl_portals_component.portals_need_ack ? PTL_ACK_REQ : PTL_NO_ACK_REQ),
*((mca_btl_base_endpoint_t*) endpoint),
OMPI_BTL_PORTALS_SEND_TABLE_ID,
0, /* ac_index - not used */
0, /* ac_index - not used */
*((ptl_match_bits_t*) match_bits), /* match bits */
0, /* remote offset - not used */
*((ptl_hdr_data_t*) hdr_data)); /* hdr_data: tag */
0, /* remote offset - not used */
*((ptl_hdr_data_t*) hdr_data)); /* hdr_data: tag */
if (ret != PTL_OK) {
opal_output(mca_btl_portals_component.portals_output,

Просмотреть файл

@ -44,6 +44,7 @@ mca_btl_sctp_module_t mca_btl_sctp_module = {
0, /* latency */
0, /* bandwidth */
0, /* flags */
0, /* segment size */
mca_btl_sctp_add_procs,
mca_btl_sctp_del_procs,
NULL,

Просмотреть файл

@ -217,6 +217,7 @@ static int mca_btl_sctp_component_register(void)
MCA_BTL_FLAGS_NEED_CSUM |
MCA_BTL_FLAGS_NEED_ACK |
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
mca_btl_sctp_module.super.btl_seg_size = sizeof (mca_btl_base_segment_t);
mca_btl_sctp_module.super.btl_bandwidth = 100;
mca_btl_sctp_module.super.btl_latency = 100;
mca_btl_base_param_register(&mca_btl_sctp_component.super.btl_version,

Просмотреть файл

@ -46,6 +46,7 @@ mca_btl_base_module_t mca_btl_self = {
0, /* btl_latency */
0, /* btl_bandwidth */
0, /* btl flags */
0, /* btl segment size */
mca_btl_self_add_procs,
mca_btl_self_del_procs,
NULL,
@ -235,7 +236,7 @@ mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl,
frag->base.des_flags = flags;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->segment.seg_key.key64[0] = (uint64_t)(intptr_t)convertor;
return &frag->base;
}
@ -264,7 +265,6 @@ mca_btl_self_prepare_dst( struct mca_btl_base_module_t* btl,
/* setup descriptor to point directly to user buffer */
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.seg_addr.pval) );
frag->segment.seg_len = reserve + max_data;
frag->segment.seg_key.key64[0] = (uint64_t)(intptr_t)convertor;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_flags = flags;

Просмотреть файл

@ -90,6 +90,7 @@ int mca_btl_self_component_open(void)
mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX;
mca_btl_self.btl_min_rdma_pipeline_size = 0;
mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_self.btl_seg_size = sizeof (mca_btl_base_segment_t);
mca_btl_self.btl_bandwidth = 100;
mca_btl_self.btl_latency = 0;
mca_btl_base_param_register(&mca_btl_self_component.super.btl_version,

Просмотреть файл

@ -75,6 +75,7 @@ mca_btl_sm_t mca_btl_sm = {
0, /* btl_latency */
0, /* btl_bandwidth */
0, /* btl flags */
0, /* btl segment size */
mca_btl_sm_add_procs,
mca_btl_sm_del_procs,
NULL,
@ -629,7 +630,7 @@ extern mca_btl_base_descriptor_t* mca_btl_sm_alloc(
}
if (OPAL_LIKELY(frag != NULL)) {
frag->segment.seg_len = size;
frag->segment.base.seg_len = size;
frag->base.des_flags = flags;
}
return (mca_btl_base_descriptor_t*)frag;
@ -693,15 +694,14 @@ struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src(
}
iov.iov_len = max_data;
iov.iov_base =
(IOVBASE_TYPE*)(((unsigned char*)(frag->segment.seg_addr.pval)) +
reserve);
(IOVBASE_TYPE*)(frag->segment.base.seg_addr.lval + reserve);
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
if( OPAL_UNLIKELY(rc < 0) ) {
MCA_BTL_SM_FRAG_RETURN(frag);
return NULL;
}
frag->segment.seg_len = reserve + max_data;
frag->segment.base.seg_len = reserve + max_data;
#if OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
} else {
#if OMPI_BTL_SM_HAVE_KNEM
@ -719,8 +719,8 @@ struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src(
MCA_BTL_SM_FRAG_RETURN(frag);
return NULL;
}
frag->segment.seg_addr.pval = iov.iov_base;
frag->segment.seg_len = max_data;
frag->segment.base.seg_addr.pval = iov.iov_base;
frag->segment.base.seg_len = max_data;
#if OMPI_BTL_SM_HAVE_KNEM
if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) {
@ -733,20 +733,20 @@ struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src(
if (OPAL_UNLIKELY(ioctl(sm_btl->knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) {
return NULL;
}
frag->segment.seg_key.key64[0] = knem_cr.cookie;
frag->segment.key = knem_cr.cookie;
}
#endif /* OMPI_BTL_SM_HAVE_KNEM */
#if OMPI_BTL_SM_HAVE_CMA
if (OPAL_LIKELY(mca_btl_sm_component.use_cma)) {
/* Encode the pid as the key */
frag->segment.seg_key.key64[0] = getpid();
frag->segment.key = getpid();
}
#endif /* OMPI_BTL_SM_HAVE_CMA */
}
#endif /* OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA */
frag->base.des_src = &(frag->segment);
frag->base.des_src = &(frag->segment.base);
frag->base.des_src_cnt = 1;
frag->base.order = MCA_BTL_NO_ORDER;
frag->base.des_dst = NULL;
@ -759,8 +759,8 @@ struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src(
#if 0
#define MCA_BTL_SM_TOUCH_DATA_TILL_CACHELINE_BOUNDARY(sm_frag) \
do { \
char* _memory = (char*)(sm_frag)->segment.seg_addr.pval + \
(sm_frag)->segment.seg_len; \
char* _memory = (char*)(sm_frag)->segment.base.seg_addr.pval + \
(sm_frag)->segment.base.seg_len; \
int* _intmem; \
size_t align = (intptr_t)_memory & 0xFUL; \
switch( align & 0x3 ) { \
@ -825,7 +825,7 @@ int mca_btl_sm_sendi( struct mca_btl_base_module_t* btl,
}
/* fill in fragment fields */
frag->segment.seg_len = length;
frag->segment.base.seg_len = length;
frag->hdr->len = length;
assert( 0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) );
frag->base.des_flags = flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP; /* why do any flags matter here other than OWNERSHIP? */
@ -833,7 +833,7 @@ int mca_btl_sm_sendi( struct mca_btl_base_module_t* btl,
frag->endpoint = endpoint;
/* write the match header (with MPI comm/tag/etc. info) */
memcpy( frag->segment.seg_addr.pval, header, header_size );
memcpy( frag->segment.base.seg_addr.pval, header, header_size );
/* write the message data if there is any */
/*
@ -844,7 +844,7 @@ int mca_btl_sm_sendi( struct mca_btl_base_module_t* btl,
struct iovec iov;
uint32_t iov_count;
/* pack the data into the supplied buffer */
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)frag->segment.seg_addr.pval + header_size);
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)frag->segment.base.seg_addr.pval + header_size);
iov.iov_len = max_data = payload_size;
iov_count = 1;
@ -892,7 +892,7 @@ int mca_btl_sm_send( struct mca_btl_base_module_t* btl,
}
/* available header space */
frag->hdr->len = frag->segment.seg_len;
frag->hdr->len = frag->segment.base.seg_len;
/* type of message, pt-2-pt, one-sided, etc */
frag->hdr->tag = tag;
@ -936,8 +936,8 @@ struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_dst(
return NULL;
}
frag->segment.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.seg_addr.pval) );
frag->segment.base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.base.seg_addr.pval) );
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
@ -961,8 +961,8 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
int btl_ownership;
mca_btl_sm_t* sm_btl = (mca_btl_sm_t*) btl;
mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)des;
mca_btl_base_segment_t *src = des->des_src;
mca_btl_base_segment_t *dst = des->des_dst;
mca_btl_sm_segment_t *src = des->des_src;
mca_btl_sm_segment_t *dst = des->des_dst;
#if OMPI_BTL_SM_HAVE_KNEM
if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) {
struct knem_cmd_inline_copy icopy;
@ -970,11 +970,11 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
/* Fill in the ioctl data fields. There's no async completion, so
we don't need to worry about getting a slot, etc. */
recv_iovec.base = (uintptr_t) dst->seg_addr.pval;
recv_iovec.len = dst->seg_len;
recv_iovec.base = (uintptr_t) dst->base.seg_addr.pval;
recv_iovec.len = dst->base.seg_len;
icopy.local_iovec_array = (uintptr_t)&recv_iovec;
icopy.local_iovec_nr = 1;
icopy.remote_cookie = src->seg_key.key64[0];
icopy.remote_cookie = src->key;
icopy.remote_offset = 0;
icopy.write = 0;
@ -983,7 +983,7 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
value is 0 (i.e., the MCA param was set to 0), the segment size
will never be larger than it, so DMA will never be used. */
icopy.flags = 0;
if (mca_btl_sm_component.knem_dma_min <= dst->seg_len) {
if (mca_btl_sm_component.knem_dma_min <= dst->base.seg_len) {
icopy.flags = mca_btl_sm_component.knem_dma_flag;
}
/* synchronous flags only, no need to specify icopy.async_status_index */
@ -1007,17 +1007,17 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
pid_t remote_pid;
int val;
remote_address = (char *) src->seg_addr.pval;
remote_length = src->seg_len;
remote_address = (char *) src->base.seg_addr.pval;
remote_length = src->base.seg_len;
local_address = (char *) dst->seg_addr.pval;
local_length = dst->seg_len;
local_address = (char *) dst->base,seg_addr.pval;
local_length = dst->base.seg_len;
remote_pid = src->seg_key.key64[0];
remote.iov_base = src->seg_addr.pval;
remote.iov_len = src->seg_len;
local.iov_base = dst->seg_addr.pval;
local.iov_len = dst->seg_len;
remote_pid = src->key;
remote.iov_base = src->base.seg_addr.pval;
remote.iov_len = src->base.seg_len;
local.iov_base = dst->base.seg_addr.pval;
local.iov_len = dst->base.seg_len;
val = process_vm_readv(remote_pid, &local, 1, &remote, 1, 0);
@ -1068,8 +1068,8 @@ int mca_btl_sm_get_async(struct mca_btl_base_module_t* btl,
int btl_ownership;
mca_btl_sm_t* sm_btl = (mca_btl_sm_t*) btl;
mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)des;
mca_btl_base_segment_t *src = des->des_src;
mca_btl_base_segment_t *dst = des->des_dst;
mca_btl_sm_segment_t *src = des->des_src;
mca_btl_sm_segment_t *dst = des->des_dst;
struct knem_cmd_inline_copy icopy;
struct knem_cmd_param_iovec recv_iovec;
@ -1082,8 +1082,8 @@ int mca_btl_sm_get_async(struct mca_btl_base_module_t* btl,
/* We have a slot, so fill in the data fields. Bump the
first_avail and num_used counters. */
recv_iovec.base = (uintptr_t) dst->seg_addr.pval;
recv_iovec.len = dst->seg_len;
recv_iovec.base = (uintptr_t) dst->base.seg_addr.pval;
recv_iovec.len = dst->base.seg_len;
icopy.local_iovec_array = (uintptr_t)&recv_iovec;
icopy.local_iovec_nr = 1;
icopy.write = 0;
@ -1093,13 +1093,13 @@ int mca_btl_sm_get_async(struct mca_btl_base_module_t* btl,
sm_btl->knem_status_first_avail = 0;
}
++sm_btl->knem_status_num_used;
icopy.remote_cookie = src->seg_key.key64[0];
icopy.remote_cookie = src->key;
icopy.remote_offset = 0;
/* Use the DMA flag if knem supports it *and* the segment length
is greater than the cutoff */
icopy.flags = KNEM_FLAG_ASYNCDMACOMPLETE;
if (mca_btl_sm_component.knem_dma_min <= dst->seg_len) {
if (mca_btl_sm_component.knem_dma_min <= dst->base.seg_len) {
icopy.flags = mca_btl_sm_component.knem_dma_flag;
}

Просмотреть файл

@ -223,6 +223,7 @@ static int sm_register(void)
}
#endif
mca_btl_sm.super.btl_seg_size = sizeof (mca_btl_sm_segment_t);
mca_btl_sm.super.btl_bandwidth = 9000; /* Mbs */
mca_btl_sm.super.btl_latency = 1; /* Microsecs */
@ -608,6 +609,7 @@ void btl_sm_process_pending_sends(struct mca_btl_base_endpoint_t *ep)
int mca_btl_sm_component_progress(void)
{
/* local variables */
mca_btl_base_segment_t seg;
mca_btl_sm_frag_t *frag;
mca_btl_sm_frag_t Frag;
sm_fifo_t *fifo = NULL;
@ -670,11 +672,10 @@ int mca_btl_sm_component_progress(void)
#endif
/* recv upcall */
reg = mca_btl_base_active_message_trigger + hdr->tag;
Frag.segment.seg_addr.pval = ((char*)hdr) +
sizeof(mca_btl_sm_hdr_t);
Frag.segment.seg_len = hdr->len;
seg.seg_addr.pval = ((char *)hdr) + sizeof(mca_btl_sm_hdr_t);
seg.seg_len = hdr->len;
Frag.base.des_dst_cnt = 1;
Frag.base.des_dst = &(Frag.segment);
Frag.base.des_dst = &seg;
reg->cbfunc(&mca_btl_sm.super, hdr->tag, &(Frag.base),
reg->cbdata);
/* return the fragment */

Просмотреть файл

@ -26,14 +26,14 @@ static inline void mca_btl_sm_frag_common_constructor(mca_btl_sm_frag_t* frag)
if(frag->hdr != NULL) {
frag->hdr->frag = (mca_btl_sm_frag_t*)((uintptr_t)frag |
MCA_BTL_SM_FRAG_ACK);
frag->segment.seg_addr.pval = ((char*)frag->hdr) +
frag->segment.base.seg_addr.pval = ((char*)frag->hdr) +
sizeof(mca_btl_sm_hdr_t);
frag->hdr->my_smp_rank = mca_btl_sm_component.my_smp_rank;
}
frag->segment.seg_len = frag->size;
frag->base.des_src = &frag->segment;
frag->segment.base.seg_len = frag->size;
frag->base.des_src = &frag->segment.base;
frag->base.des_src_cnt = 1;
frag->base.des_dst = &frag->segment;
frag->base.des_dst = &frag->segment.base;
frag->base.des_dst_cnt = 1;
frag->base.des_flags = 0;
}

Просмотреть файл

@ -45,12 +45,20 @@ struct mca_btl_sm_hdr_t {
};
typedef struct mca_btl_sm_hdr_t mca_btl_sm_hdr_t;
struct mca_btl_sm_segment_t {
mca_btl_base_segment_t base;
#if OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
uint64_t key;
#endif
};
typedef struct mca_btl_sm_segment_t mca_btl_sm_segment_t;
/**
* shared memory send fragment derived type.
*/
struct mca_btl_sm_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
mca_btl_sm_segment_t segment;
struct mca_btl_base_endpoint_t *endpoint;
size_t size;
/* pointer written to the FIFO, this is the base of the shared memory region */

Просмотреть файл

@ -74,6 +74,7 @@ mca_btl_smcuda_t mca_btl_smcuda = {
0, /* btl_latency */
0, /* btl_bandwidth */
0, /* btl flags */
0, /* btl segment size */
mca_btl_smcuda_add_procs,
mca_btl_smcuda_del_procs,
NULL,
@ -654,7 +655,7 @@ extern mca_btl_base_descriptor_t* mca_btl_smcuda_alloc(
}
if (OPAL_LIKELY(frag != NULL)) {
frag->segment.seg_len = size;
frag->segment.base.seg_len = size;
frag->base.des_flags = flags;
}
return (mca_btl_base_descriptor_t*)frag;
@ -714,15 +715,14 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
}
iov.iov_len = max_data;
iov.iov_base =
(IOVBASE_TYPE*)(((unsigned char*)(frag->segment.seg_addr.pval)) +
reserve);
(IOVBASE_TYPE*)(frag->segment.base.seg_addr.lval + reserve);
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
if( OPAL_UNLIKELY(rc < 0) ) {
MCA_BTL_SMCUDA_FRAG_RETURN(frag);
return NULL;
}
frag->segment.seg_len = reserve + max_data;
frag->segment.base.seg_len = reserve + max_data;
#if OMPI_CUDA_SUPPORT
} else {
/* Normally, we are here because we have a GPU buffer and we are preparing
@ -745,9 +745,9 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
MCA_BTL_SMCUDA_FRAG_RETURN(frag);
return NULL;
}
frag->segment.seg_addr.pval = iov.iov_base;
frag->segment.seg_len = max_data;
memcpy(frag->segment.seg_key.cudakey, ((mca_mpool_common_cuda_reg_t *)registration)->memHandle,
frag->segment.base.seg_addr.pval = iov.iov_base;
frag->segment.base.seg_len = max_data;
memcpy(frag->segment.key, ((mca_mpool_common_cuda_reg_t *)registration)->memHandle,
sizeof(((mca_mpool_common_cuda_reg_t *)registration)->memHandle) +
sizeof(((mca_mpool_common_cuda_reg_t *)registration)->evtHandle));
frag->segment.memh_seg_addr.pval = registration->base;
@ -755,7 +755,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
}
#endif /* OMPI_CUDA_SUPPORT */
frag->base.des_src = &(frag->segment);
frag->base.des_src = &(frag->segment.base);
frag->base.des_src_cnt = 1;
frag->base.order = MCA_BTL_NO_ORDER;
frag->base.des_dst = NULL;
@ -766,10 +766,10 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
}
#if 0
#define MCA_BTL_SMCUDA_TOUCH_DATA_TILL_CACHELINE_BOUNDARY(sm_frag) \
#define MCA_BTL_SMCUDA_TOUCH_DATA_TILL_CACHELINE_BOUNDARY(sm_frag) \
do { \
char* _memory = (char*)(sm_frag)->segment.seg_addr.pval + \
(sm_frag)->segment.seg_len; \
char* _memory = (char*)(sm_frag)->segment.base.seg_addr.pval + \
(sm_frag)->segment.base.seg_len; \
int* _intmem; \
size_t align = (intptr_t)_memory & 0xFUL; \
switch( align & 0x3 ) { \
@ -834,7 +834,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
}
/* fill in fragment fields */
frag->segment.seg_len = length;
frag->segment.base.seg_len = length;
frag->hdr->len = length;
assert( 0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) );
frag->base.des_flags = flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP; /* why do any flags matter here other than OWNERSHIP? */
@ -842,7 +842,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
frag->endpoint = endpoint;
/* write the match header (with MPI comm/tag/etc. info) */
memcpy( frag->segment.seg_addr.pval, header, header_size );
memcpy( frag->segment.base.seg_addr.pval, header, header_size );
/* write the message data if there is any */
/*
@ -853,7 +853,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
struct iovec iov;
uint32_t iov_count;
/* pack the data into the supplied buffer */
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)frag->segment.seg_addr.pval + header_size);
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)frag->segment.base.seg_addr.pval + header_size);
iov.iov_len = max_data = payload_size;
iov_count = 1;
@ -901,7 +901,7 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
}
/* available header space */
frag->hdr->len = frag->segment.seg_len;
frag->hdr->len = frag->segment.base.seg_len;
/* type of message, pt-2-pt, one-sided, etc */
frag->hdr->tag = tag;
@ -949,12 +949,12 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
return NULL;
}
frag->segment.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.seg_addr.pval) );
frag->segment.base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.base.seg_addr.pval) );
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = &frag->segment;
frag->base.des_dst = &frag->segment.base;
frag->base.des_dst_cnt = 1;
frag->base.des_flags = flags;
return &frag->base;
@ -967,6 +967,8 @@ int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor)
{
mca_btl_smcuda_segment_t *src_seg = (mca_btl_smcuda_segment_t *) descriptor->des_src;
mca_btl_smcuda_segment_t *dst_seg = (mca_btl_smcuda_segment_t *) descriptor->des_dst;
mca_mpool_common_cuda_reg_t rget_reg;
mca_mpool_common_cuda_reg_t *reg_ptr = &rget_reg;
int btl_ownership;
@ -980,8 +982,7 @@ int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
* garbage in the debugger. */
memset(&rget_reg, 0, sizeof(rget_reg));
memcpy(&rget_reg.memHandle, descriptor->des_src->seg_key.cudakey,
sizeof(descriptor->des_src->seg_key.cudakey));
memcpy(&rget_reg.memHandle, src_seg->key, sizeof(src_seg->key));
/* Open the memory handle to the remote memory. If it is cached, then
* we just retrieve it from cache and avoid a call to open the handle. That
@ -990,8 +991,8 @@ int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
* remote memory which may lie somewhere in the middle. This is taken care of
* a few lines down. Note that we hand in the peer rank just for debugging
* support. */
rc = ep->mpool->mpool_register(ep->mpool, descriptor->des_src->memh_seg_addr.pval,
descriptor->des_src->memh_seg_len, ep->peer_smp_rank,
rc = ep->mpool->mpool_register(ep->mpool, src_seg->memh_seg_addr.pval,
src_seg->memh_seg_len, ep->peer_smp_rank,
(mca_mpool_base_registration_t **)&reg_ptr);
if (OMPI_SUCCESS != rc) {
@ -1006,7 +1007,7 @@ int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
* not equal the address that was used to retrieve the block.
* Therefore, compute the offset and add it to the address of the
* memory handle. */
offset = (unsigned char *)descriptor->des_src->seg_addr.pval - reg_ptr->base.base;
offset = (unsigned char *)src_seg->base.seg_addr.lval - reg_ptr->base.base;
remote_memory_address = (unsigned char *)reg_ptr->base.alloc_base + offset;
if (0 != offset) {
opal_output(-1, "OFFSET=%d", (int)offset);
@ -1019,8 +1020,8 @@ int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
* rget_reg, not reg_ptr, as we do not cache the event. */
mca_common_wait_stream_synchronize(&rget_reg);
rc = mca_common_cuda_memcpy(descriptor->des_dst->seg_addr.pval, remote_memory_address,
descriptor->des_dst->seg_len, "mca_btl_smcuda_get",
rc = mca_common_cuda_memcpy(dst_seg->base.seg_addr.pval, remote_memory_address,
dst_seg->base.seg_len, "mca_btl_smcuda_get",
(mca_btl_base_descriptor_t *)frag, &done);
if (OMPI_SUCCESS != rc) {
/* Out of resources can be handled by upper layers. */

Просмотреть файл

@ -171,6 +171,7 @@ static int smcuda_register(void)
#if OMPI_CUDA_SUPPORT
mca_btl_smcuda.super.btl_flags |= MCA_BTL_FLAGS_CUDA_GET;
#endif /* OMPI_CUDA_SUPPORT */
mca_btl_smcuda.super.btl_seg_size = sizeof (mca_btl_smcuda_segment_t);
mca_btl_smcuda.super.btl_bandwidth = 9000; /* Mbs */
mca_btl_smcuda.super.btl_latency = 1; /* Microsecs */
@ -412,6 +413,7 @@ void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep)
int mca_btl_smcuda_component_progress(void)
{
/* local variables */
mca_btl_base_segment_t seg;
mca_btl_smcuda_frag_t *frag;
mca_btl_smcuda_frag_t Frag;
sm_fifo_t *fifo = NULL;
@ -474,11 +476,10 @@ int mca_btl_smcuda_component_progress(void)
#endif
/* recv upcall */
reg = mca_btl_base_active_message_trigger + hdr->tag;
Frag.segment.seg_addr.pval = ((char*)hdr) +
sizeof(mca_btl_smcuda_hdr_t);
Frag.segment.seg_len = hdr->len;
seg.seg_addr.pval = ((char*)hdr) + sizeof(mca_btl_smcuda_hdr_t);
seg.seg_len = hdr->len;
Frag.base.des_dst_cnt = 1;
Frag.base.des_dst = &(Frag.segment);
Frag.base.des_dst = &seg;
reg->cbfunc(&mca_btl_smcuda.super, hdr->tag, &(Frag.base),
reg->cbdata);
/* return the fragment */

Просмотреть файл

@ -27,14 +27,14 @@ static inline void mca_btl_smcuda_frag_common_constructor(mca_btl_smcuda_frag_t*
if(frag->hdr != NULL) {
frag->hdr->frag = (mca_btl_smcuda_frag_t*)((uintptr_t)frag |
MCA_BTL_SMCUDA_FRAG_ACK);
frag->segment.seg_addr.pval = ((char*)frag->hdr) +
frag->segment.base.seg_addr.pval = ((char*)frag->hdr) +
sizeof(mca_btl_smcuda_hdr_t);
frag->hdr->my_smp_rank = mca_btl_smcuda_component.my_smp_rank;
}
frag->segment.seg_len = frag->size;
frag->base.des_src = &frag->segment;
frag->segment.base.seg_len = frag->size;
frag->base.des_src = &frag->segment.base;
frag->base.des_src_cnt = 1;
frag->base.des_dst = &frag->segment;
frag->base.des_dst = &frag->segment.base;
frag->base.des_dst_cnt = 1;
frag->base.des_flags = 0;
#if OMPI_CUDA_SUPPORT

Просмотреть файл

@ -46,12 +46,24 @@ struct mca_btl_smcuda_hdr_t {
};
typedef struct mca_btl_smcuda_hdr_t mca_btl_smcuda_hdr_t;
struct mca_btl_smcuda_segment_t {
mca_btl_base_segment_t base;
#if OMPI_CUDA_SUPPORT
uint8_t key[128]; /* 64 bytes for CUDA mem handle, 64 bytes for CUDA event handle */
/** Address of the entire memory handle */
ompi_ptr_t memh_seg_addr;
/** Length in bytes of entire memory handle */
uint32_t memh_seg_len;
#endif
};
typedef struct mca_btl_smcuda_segment_t mca_btl_smcuda_segment_t;
/**
* shared memory send fragment derived type.
*/
struct mca_btl_smcuda_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
mca_btl_smcuda_segment_t segment;
struct mca_btl_base_endpoint_t *endpoint;
#if OMPI_CUDA_SUPPORT
struct mca_mpool_base_registration_t *registration;

Просмотреть файл

@ -46,6 +46,7 @@ mca_btl_tcp_module_t mca_btl_tcp_module = {
0, /* latency */
0, /* bandwidth */
0, /* flags */
0, /* segment size */
mca_btl_tcp_add_procs,
mca_btl_tcp_del_procs,
NULL,

Просмотреть файл

@ -262,6 +262,7 @@ static int mca_btl_tcp_component_register(void)
MCA_BTL_FLAGS_NEED_CSUM |
MCA_BTL_FLAGS_NEED_ACK |
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
mca_btl_tcp_module.super.btl_seg_size = sizeof (mca_btl_base_segment_t);
mca_btl_tcp_module.super.btl_bandwidth = 100;
mca_btl_tcp_module.super.btl_latency = 100;

Просмотреть файл

@ -60,6 +60,7 @@ mca_btl_udapl_module_t mca_btl_udapl_module = {
0, /* latency */
0, /* bandwidth */
MCA_BTL_FLAGS_SEND,
0, /* segment size */
mca_btl_udapl_add_procs,
mca_btl_udapl_del_procs,
NULL,
@ -954,7 +955,7 @@ mca_btl_base_descriptor_t* mca_btl_udapl_alloc(
return NULL;
}
frag->segment.seg_len = size;
frag->segment.base.seg_len = size;
/* Set up the LMR triplet from the frag segment.
* Note: The triplet.segment_len is set to what is required for
@ -963,9 +964,9 @@ mca_btl_base_descriptor_t* mca_btl_udapl_alloc(
* triplet.segment_len will have to change.
*/
frag->triplet.virtual_address =
(DAT_VADDR)(uintptr_t)frag->segment.seg_addr.pval;
(DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval;
frag->triplet.segment_length =
frag->segment.seg_len + sizeof(mca_btl_udapl_footer_t);
frag->segment.base.seg_len + sizeof(mca_btl_udapl_footer_t);
assert(frag->triplet.lmr_context ==
frag->registration->lmr_triplet.lmr_context);
@ -1067,8 +1068,8 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_src(
frag->registration = (mca_btl_udapl_reg_t*)registration;
}
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->segment.base.seg_len = max_data;
frag->segment.base.seg_addr.pval = iov.iov_base;
frag->triplet.segment_length = max_data;
frag->triplet.virtual_address = (DAT_VADDR)(uintptr_t)iov.iov_base;
frag->triplet.lmr_context =
@ -1104,7 +1105,7 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_src(
}
iov.iov_len = max_data;
iov.iov_base = (char *) frag->segment.seg_addr.pval + reserve;
iov.iov_base = (char *) frag->segment.base.seg_addr.pval + reserve;
rc = opal_convertor_pack(convertor,
&iov, &iov_count, &max_data );
@ -1116,11 +1117,11 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_src(
*size = max_data;
/* setup lengths and addresses to send out data */
frag->segment.seg_len = max_data + reserve;
frag->segment.base.seg_len = max_data + reserve;
frag->triplet.segment_length =
max_data + reserve + sizeof(mca_btl_udapl_footer_t);
frag->triplet.virtual_address =
(DAT_VADDR)(uintptr_t)frag->segment.seg_addr.pval;
(DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval;
/* initialize base descriptor */
frag->base.des_src = &frag->segment;
@ -1164,16 +1165,16 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_dst(
return NULL;
}
frag->segment.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.seg_addr.pval) );
frag->segment.base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.base.seg_addr.pval) );
if(NULL == registration) {
/* didn't get a memory registration passed in, so must
* register the region now
*/
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
frag->segment.seg_addr.pval,
frag->segment.seg_len,
frag->segment.base.seg_addr.pval,
frag->segment.base.seg_len,
0,
&registration);
if(OMPI_SUCCESS != rc || NULL == registration) {
@ -1189,8 +1190,7 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_dst(
frag->base.des_dst_cnt = 1;
frag->base.des_flags = flags;
frag->segment.seg_key.key32[0] =
((mca_btl_udapl_reg_t*)registration)->rmr_context;
frag->segment.context = ((mca_btl_udapl_reg_t*)registration)->rmr_context;
frag->base.order = MCA_BTL_NO_ORDER;
@ -1217,7 +1217,7 @@ int mca_btl_udapl_send(
frag->endpoint = endpoint;
frag->ftr = (mca_btl_udapl_footer_t *)
((char *)frag->segment.seg_addr.pval + frag->segment.seg_len);
((char *)frag->segment.base.seg_addr.pval + frag->segment.base.seg_len);
frag->ftr->tag = tag;
frag->type = MCA_BTL_UDAPL_SEND;
@ -1245,7 +1245,7 @@ int mca_btl_udapl_put(
int rc = OMPI_SUCCESS;
mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)des;
mca_btl_base_segment_t *dst_segment = des->des_dst;
mca_btl_udapl_segment_t *dst_segment = des->des_dst;
frag->btl = (mca_btl_udapl_module_t *)btl;
frag->endpoint = endpoint;
@ -1262,43 +1262,42 @@ int mca_btl_udapl_put(
} else {
/* work queue tokens available, try to send */
if(OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], -1) < 0) {
if(OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], -1) < 0) {
OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], 1);
OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], 1);
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
opal_list_append(&endpoint->endpoint_max_frags,
(opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
opal_progress();
} else {
frag->triplet.segment_length = frag->segment.seg_len;
OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], 1);
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
opal_list_append(&endpoint->endpoint_max_frags,
(opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
opal_progress();
} else {
frag->triplet.segment_length = frag->segment.base.seg_len;
remote_buffer.rmr_context =
(DAT_RMR_CONTEXT)dst_segment->seg_key.key32[0];
remote_buffer.target_address =
(DAT_VADDR)(uintptr_t)dst_segment->seg_addr.lval;
remote_buffer.segment_length = dst_segment->seg_len;
remote_buffer.rmr_context = dst_segment->context;
remote_buffer.target_address =
(DAT_VADDR)(uintptr_t)dst_segment->base.seg_addr.lval;
remote_buffer.segment_length = dst_segment->base.seg_len;
cookie.as_ptr = frag;
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
rc = dat_ep_post_rdma_write(endpoint->endpoint_max,
1,
&frag->triplet,
cookie,
&remote_buffer,
DAT_COMPLETION_DEFAULT_FLAG);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(DAT_SUCCESS != rc) {
char* major;
char* minor;
cookie.as_ptr = frag;
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
rc = dat_ep_post_rdma_write(endpoint->endpoint_max,
1,
&frag->triplet,
cookie,
&remote_buffer,
DAT_COMPLETION_DEFAULT_FLAG);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(DAT_SUCCESS != rc) {
char* major;
char* minor;
dat_strerror(rc, (const char**)&major,
(const char**)&minor);
BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_rdma_write",
major, minor));
rc = OMPI_ERROR;
}
}
dat_strerror(rc, (const char**)&major,
(const char**)&minor);
BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_rdma_write",
major, minor));
rc = OMPI_ERROR;
}
}
}
return rc;

Просмотреть файл

@ -214,6 +214,8 @@ int mca_btl_udapl_component_open(void)
sizeof(mca_btl_udapl_frag_eager_rdma_t) +
mca_btl_udapl_component.udapl_eager_frag_size;
mca_btl_udapl_module.super.btl_seg_size = sizeof (mca_btl_udapl_segment_t);
return rc;
}
@ -285,13 +287,13 @@ static void mca_btl_udapl_receive_control(struct mca_btl_base_module_t* btl,
mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)descriptor;
mca_btl_udapl_endpoint_t* endpoint = frag->endpoint;
mca_btl_udapl_control_header_t* ctl_hdr =
frag->segment.seg_addr.pval;
frag->segment.base.seg_addr.pval;
switch (ctl_hdr->type) {
case MCA_BTL_UDAPL_CONTROL_RDMA_CONNECT:
{
mca_btl_udapl_eager_rdma_connect_t* rdma_connect =
frag->segment.seg_addr.pval;
frag->segment.base.seg_addr.pval;
if (endpoint->endpoint_eager_rdma_remote.base.pval) {
BTL_ERROR(("ERROR: Received RDMA connect twice!"));
@ -309,7 +311,7 @@ static void mca_btl_udapl_receive_control(struct mca_btl_base_module_t* btl,
case MCA_BTL_UDAPL_CONTROL_RDMA_CREDIT:
{
mca_btl_udapl_eager_rdma_credit_t* rdma_credit =
frag->segment.seg_addr.pval;
frag->segment.base.seg_addr.pval;
/* don't return credits used for rdma credit control message */
OPAL_THREAD_ADD32(
@ -324,7 +326,7 @@ static void mca_btl_udapl_receive_control(struct mca_btl_base_module_t* btl,
case MCA_BTL_UDAPL_CONTROL_SR_CREDIT:
{
mca_btl_udapl_sr_credit_t* sr_credit =
frag->segment.seg_addr.pval;
frag->segment.base.seg_addr.pval;
/* don't return credits used for sr credit control message */
OPAL_THREAD_ADD32(
@ -717,9 +719,9 @@ static inline int mca_btl_udapl_sendrecv(mca_btl_udapl_module_t* btl,
flags);
cookie.as_ptr = frag;
memcpy(frag->segment.seg_addr.pval,
memcpy(frag->segment.base.seg_addr.pval,
&btl->udapl_addr, sizeof(mca_btl_udapl_addr_t));
memcpy((char *)frag->segment.seg_addr.pval + sizeof(mca_btl_udapl_addr_t),
memcpy((char *)frag->segment.base.seg_addr.pval + sizeof(mca_btl_udapl_addr_t),
&connection_seq, sizeof(int32_t));
connection_seq++;
@ -947,12 +949,12 @@ int mca_btl_udapl_component_progress()
assert(frag->base.des_src_cnt == 0);
assert(frag->type == MCA_BTL_UDAPL_RECV);
assert(frag->triplet.virtual_address ==
(DAT_VADDR)(uintptr_t)frag->segment.seg_addr.pval);
(DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval);
assert(frag->triplet.segment_length == frag->size);
assert(frag->btl == btl);
/* setup frag ftr location and do callback */
frag->segment.seg_len = dto->transfered_length -
frag->segment.base.seg_len = dto->transfered_length -
sizeof(mca_btl_udapl_footer_t);
frag->ftr = (mca_btl_udapl_footer_t *)
((char *)frag->segment.seg_addr.pval +

Просмотреть файл

@ -38,7 +38,7 @@ static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag)
#endif
frag->registration = reg;
frag->segment.seg_addr.pval = (unsigned char*)frag->base.super.ptr;
frag->segment.base.seg_addr.pval = (unsigned char*)frag->base.super.ptr;
frag->ftr = NULL;
/* Don't understand why yet, but there are cases where reg is NULL -
@ -52,14 +52,14 @@ static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag)
static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag)
{
frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit;
frag->segment.base.seg_len = mca_btl_udapl_module.super.btl_eager_limit;
frag->size = mca_btl_udapl_component.udapl_eager_frag_size;
mca_btl_udapl_frag_common_constructor(frag);
}
static void mca_btl_udapl_frag_max_constructor(mca_btl_udapl_frag_t* frag)
{
frag->segment.seg_len = mca_btl_udapl_module.super.btl_max_send_size;
frag->segment.base.seg_len = mca_btl_udapl_module.super.btl_max_send_size;
frag->size = mca_btl_udapl_component.udapl_max_frag_size;
mca_btl_udapl_frag_common_constructor(frag);
}
@ -67,8 +67,8 @@ static void mca_btl_udapl_frag_max_constructor(mca_btl_udapl_frag_t* frag)
static void mca_btl_udapl_frag_user_constructor(mca_btl_udapl_frag_t* frag)
{
mca_btl_udapl_frag_common_constructor(frag);
frag->segment.seg_len = 0;
frag->segment.seg_addr.pval = NULL;
frag->segment.base.seg_len = 0;
frag->segment.base.seg_addr.pval = NULL;
frag->ftr = NULL;
frag->size = 0;
frag->registration = NULL;
@ -77,10 +77,10 @@ static void mca_btl_udapl_frag_user_constructor(mca_btl_udapl_frag_t* frag)
static void mca_btl_udapl_frag_eager_rdma_constructor(mca_btl_udapl_frag_t* frag)
{
mca_btl_udapl_frag_eager_constructor(frag);
frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit;
frag->segment.base.seg_len = mca_btl_udapl_module.super.btl_eager_limit;
frag->size = mca_btl_udapl_component.udapl_eager_frag_size;
frag->rdma_ftr = (mca_btl_udapl_rdma_footer_t *)
((char *)(frag->segment.seg_addr.pval) +
((char *)(frag->segment.base.seg_addr.pval) +
frag->size -
sizeof(mca_btl_udapl_rdma_footer_t));
frag->rdma_ftr->active=0;
@ -92,8 +92,8 @@ static void mca_btl_udapl_frag_common_destructor(mca_btl_udapl_frag_t* frag)
frag->ftr = NULL;
frag->size = 0;
frag->registration = NULL;
frag->segment.seg_len = 0;
frag->segment.seg_addr.pval = NULL;
frag->segment.base.seg_len = 0;
frag->segment.base.seg_addr.pval = NULL;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;

Просмотреть файл

@ -79,12 +79,18 @@ struct mca_btl_udapl_rdma_footer_t {
};
typedef struct mca_btl_udapl_rdma_footer_t mca_btl_udapl_rdma_footer_t;
struct mca_btl_udapl_segment_t {
mca_btl_base_segment_t base;
DAT_RMR_CONTEXT context;
};
typedef struct mca_btl_udapl_segment_t mca_btl_udapl_segment_t;
/**
* uDAPL fragment derived type.
*/
struct mca_btl_udapl_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
mca_btl_udapl_segment_t segment;
struct mca_btl_udapl_module_t* btl;
struct mca_btl_base_endpoint_t* endpoint;

Просмотреть файл

@ -112,7 +112,7 @@ btl_ugni_component_register(void)
mca_btl_ugni_component.ugni_get_limit =
mca_btl_ugni_param_register_int("get_limit", "Maximum size message that "
"will be sent using a get protocol "
"(default 4M)", 4 * 1024 * 1024);
"(default 1M)", 1 * 1024 * 1024);
mca_btl_ugni_component.rdma_max_retries =
mca_btl_ugni_param_register_int("rdma_max_retries", NULL, 16);
@ -142,6 +142,8 @@ btl_ugni_component_register(void)
mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_ugni_module.super.btl_seg_size = sizeof (mca_btl_ugni_segment_t);
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */
@ -427,7 +429,7 @@ mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module)
return 0;
}
BTL_VERBOSE(("RDMA/FMA complete for frag %p", frag));
BTL_VERBOSE(("RDMA/FMA complete for frag %p", (void *) frag));
frag->cbfunc (frag, ompi_common_rc_ugni_to_ompi (rc));
@ -462,9 +464,12 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
(mca_btl_base_endpoint_t *) opal_list_remove_first (&ugni_module->ep_wait_list);
assert (NULL != endpoint);
endpoint->wait_listed = false;
rc = mca_btl_progress_send_wait_list (endpoint);
if (OMPI_SUCCESS != rc) {
if (OMPI_SUCCESS != rc && false == endpoint->wait_listed) {
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
endpoint->wait_listed = true;
}
}

Просмотреть файл

@ -42,6 +42,7 @@ typedef struct mca_btl_base_endpoint_t {
struct mca_btl_ugni_smsg_mbox_t *mailbox;
opal_list_t frag_wait_list;
bool wait_listed;
int32_t smsg_progressing;
} mca_btl_base_endpoint_t;

Просмотреть файл

@ -16,7 +16,7 @@
static inline void mca_btl_ugni_base_frag_constructor (mca_btl_ugni_base_frag_t *frag)
{
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
frag->segments[0].base.seg_addr.pval = frag->base.super.ptr;
}
static inline void mca_btl_ugni_eager_frag_constructor (mca_btl_ugni_base_frag_t *frag)
@ -25,8 +25,8 @@ static inline void mca_btl_ugni_eager_frag_constructor (mca_btl_ugni_base_frag_t
(struct mca_btl_ugni_reg_t *) frag->base.super.registration;
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
memmove (frag->segments[0].seg_key.key64, &reg->memory_hdl, sizeof (reg->memory_hdl));
frag->segments[0].base.seg_addr.pval = frag->base.super.ptr;
frag->segments[0].memory_handle = reg->memory_hdl;
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_frag_t, mca_btl_base_descriptor_t,

Просмотреть файл

@ -61,11 +61,18 @@ struct mca_btl_ugni_base_frag_t;
typedef void (*frag_cb_t) (struct mca_btl_ugni_base_frag_t *, int);
typedef struct mca_btl_ugni_segment_t {
mca_btl_base_segment_t base;
gni_mem_handle_t memory_handle;
uint8_t extra_bytes[3];
uint8_t extra_byte_count;
} mca_btl_ugni_segment_t;
typedef struct mca_btl_ugni_base_frag_t {
mca_btl_base_descriptor_t base;
size_t hdr_size;
mca_btl_ugni_frag_hdr_t hdr;
mca_btl_base_segment_t segments[2];
mca_btl_ugni_segment_t segments[2];
ompi_common_ugni_post_desc_t post_desc;
mca_btl_base_endpoint_t *endpoint;
mca_btl_ugni_reg_t *registration;

Просмотреть файл

@ -24,7 +24,9 @@ int mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *des) {
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
size_t size = des->des_src->seg_len;
mca_btl_ugni_segment_t *src_seg = (mca_btl_ugni_segment_t *) des->des_src;
mca_btl_ugni_segment_t *dst_seg = (mca_btl_ugni_segment_t *) des->des_dst;
size_t size = src_seg->base.seg_len - src_seg->extra_byte_count;
bool check;
BTL_VERBOSE(("Using RDMA/FMA Get"));
@ -40,9 +42,15 @@ int mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
return OMPI_ERR_NOT_AVAILABLE;
}
if (src_seg->extra_byte_count) {
memmove (dst_seg->base.seg_addr.pval + size, src_seg->extra_bytes, src_seg->extra_byte_count);
src_seg->base.seg_len = size;
dst_seg->base.seg_len = size;
}
des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
return mca_btl_ugni_post (frag, true, des->des_dst, des->des_src);
return mca_btl_ugni_post (frag, true, dst_seg, src_seg);
}
static void mca_btl_ugni_frag_set_ownership (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
@ -82,23 +90,24 @@ static void mca_btl_ugni_callback_eager_get (mca_btl_base_module_t *btl, mca_btl
size_t payload_len = frag->hdr.eager.src_seg.seg_len;
size_t hdr_len = len - payload_len;
mca_btl_active_message_callback_t *reg;
mca_btl_base_segment_t segs[2];
mca_btl_ugni_base_frag_t tmp;
BTL_VERBOSE(("eager get for rem_ctx %p complete", frag->hdr.eager.ctx));
tmp.base.des_dst = tmp.segments;
tmp.base.des_dst = segs;
if (hdr_len) {
tmp.base.des_dst_cnt = 2;
tmp.segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
tmp.segments[0].seg_len = hdr_len;
tmp.segments[1].seg_addr.pval = frag->segments[0].seg_addr.pval;
tmp.segments[1].seg_len = payload_len;
segs[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
segs[0].seg_len = hdr_len;
segs[1].seg_addr.pval = frag->segments[0].base.seg_addr.pval;
segs[1].seg_len = payload_len;
} else {
tmp.base.des_dst_cnt = 1;
tmp.segments[0].seg_addr.pval = frag->segments[0].seg_addr.pval;
tmp.segments[0].seg_len = payload_len;
segs[0].seg_addr.pval = frag->segments[0].base.seg_addr.pval;
segs[0].seg_len = payload_len;
}
reg = mca_btl_base_active_message_trigger + tag;
@ -139,18 +148,13 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get;
frag->base.des_flags = MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.des_dst = frag->segments;
frag->base.des_dst_cnt = 1;
frag->segments[1] = hdr.eager.src_seg;
frag->base.des_src = frag->segments + 1;
frag->base.des_src_cnt = 1;
frag->segments[1].base = hdr.eager.src_seg;
/* increase size to a multiple of 4 bytes (required for get) */
frag->segments[0].seg_len = frag->segments[1].seg_len =
frag->segments[0].base.seg_len = frag->segments[1].base.seg_len =
(hdr.eager.src_seg.seg_len + 3) & ~3;
rc = mca_btl_ugni_post (frag, true, frag->base.des_dst, frag->base.des_src);
rc = mca_btl_ugni_post (frag, true, frag->segments, frag->segments + 1);
if (OPAL_UNLIKELY(OMPI_SUCCESS == rc)) {
return OMPI_SUCCESS;
}

Просмотреть файл

@ -54,6 +54,7 @@ mca_btl_ugni_module_t mca_btl_ugni_module = {
/* .btl_latency = */ 0,
/* .btl_bandwidth = */ 0,
/* .btl_flags = */ 0,
/* .btl_seg_size = */ 0,
/* member functions */
mca_btl_ugni_add_procs,
@ -215,15 +216,15 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
frag->base.des_flags = flags;
frag->base.order = order;
frag->base.des_src = frag->segments + 1;
frag->base.des_src = &frag->segments[1].base;
frag->base.des_src_cnt = 1;
frag->base.des_dst = frag->segments + 1;
frag->base.des_dst = &frag->segments[1].base;
frag->base.des_dst_cnt = 1;
frag->segments[0].seg_addr.pval = NULL;
frag->segments[0].seg_len = 0;
frag->segments[1].seg_addr.pval = frag->base.super.ptr;
frag->segments[1].seg_len = size;
frag->segments[0].base.seg_addr.pval = NULL;
frag->segments[0].base.seg_len = 0;
frag->segments[1].base.seg_addr.pval = frag->base.super.ptr;
frag->segments[1].base.seg_len = size;
frag->flags = MCA_BTL_UGNI_FRAG_BUFFERED;
if (size > mca_btl_ugni_component.smsg_max_data) {
@ -234,9 +235,7 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
registration = (mca_btl_ugni_reg_t *) frag->base.super.registration;
memcpy ((void *) frag->segments[1].seg_key.key64,
(void *)&registration->memory_hdl,
sizeof (registration->memory_hdl));
frag->segments[1].memory_handle = registration->memory_hdl;
} else {
frag->hdr_size = sizeof (frag->hdr.send);
}
@ -299,14 +298,11 @@ mca_btl_ugni_prepare_dst (mca_btl_base_module_t *btl,
frag->registration = (mca_btl_ugni_reg_t*) registration;
}
memcpy ((void *) frag->segments[0].seg_key.key64,
(void *)&((mca_btl_ugni_reg_t *)registration)->memory_hdl,
sizeof (((mca_btl_ugni_reg_t *)registration)->memory_hdl));
frag->segments[0].memory_handle = ((mca_btl_ugni_reg_t *)registration)->memory_hdl;
frag->segments[0].base.seg_len = *size;
frag->segments[0].base.seg_addr.pval = data_ptr;
frag->segments[0].seg_len = *size;
frag->segments[0].seg_addr.pval = data_ptr;
frag->base.des_dst = frag->segments;
frag->base.des_dst = &frag->segments->base;
frag->base.des_dst_cnt = 1;
frag->base.order = order;
frag->base.des_flags = flags;

Просмотреть файл

@ -20,10 +20,10 @@
static inline struct mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
bool use_eager_get = (*size + reserve) > mca_btl_ugni_component.smsg_max_data;
mca_btl_ugni_base_frag_t *frag = NULL;
@ -53,23 +53,21 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
frag->flags = MCA_BTL_UGNI_FRAG_EAGER | MCA_BTL_UGNI_FRAG_IGNORE;
frag->registration = registration;
memcpy ((void *) frag->segments[1].seg_key.key64,
(void *)&registration->memory_hdl,
sizeof (registration->memory_hdl));
frag->segments[1].memory_handle = registration->memory_hdl;
frag->hdr_size = reserve + sizeof (frag->hdr.eager);
frag->segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
frag->segments[0].base.seg_addr.pval = frag->hdr.eager_ex.pml_header;
} else {
frag->hdr_size = reserve + sizeof (frag->hdr.send);
frag->segments[0].seg_addr.pval = frag->hdr.send_ex.pml_header;
frag->segments[0].base.seg_addr.pval = frag->hdr.send_ex.pml_header;
}
frag->segments[0].seg_len = reserve;
frag->segments[0].base.seg_len = reserve;
frag->segments[1].seg_addr.pval = data_ptr;
frag->segments[1].seg_len = *size;
frag->segments[1].base.seg_addr.pval = data_ptr;
frag->segments[1].base.seg_len = *size;
frag->base.des_src = frag->segments;
frag->base.des_src = &frag->segments->base;
frag->base.des_src_cnt = 2;
frag->base.order = order;
frag->base.des_flags = flags;
@ -102,12 +100,10 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
registration = (mca_btl_ugni_reg_t *) frag->base.super.registration;
memcpy ((void *) frag->segments[1].seg_key.key64,
(void *)&registration->memory_hdl,
sizeof (registration->memory_hdl));
frag->segments[1].memory_handle = registration->memory_hdl;
frag->hdr_size = reserve + sizeof (frag->hdr.eager);
frag->segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
frag->segments[0].base.seg_addr.pval = frag->hdr.eager_ex.pml_header;
} else {
(void) MCA_BTL_UGNI_FRAG_ALLOC_SMSG(endpoint, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
@ -115,7 +111,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
}
frag->hdr_size = reserve + sizeof (frag->hdr.send);
frag->segments[0].seg_addr.pval = frag->hdr.send_ex.pml_header;
frag->segments[0].base.seg_addr.pval = frag->hdr.send_ex.pml_header;
}
frag->flags |= MCA_BTL_UGNI_FRAG_BUFFERED;
@ -131,12 +127,12 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
}
}
frag->segments[0].seg_len = reserve;
frag->segments[0].base.seg_len = reserve;
frag->segments[1].seg_addr.pval = frag->base.super.ptr;
frag->segments[1].seg_len = *size;
frag->segments[1].base.seg_addr.pval = frag->base.super.ptr;
frag->segments[1].base.seg_len = *size;
frag->base.des_src = frag->segments;
frag->base.des_src = &frag->segments->base;
frag->base.des_src_cnt = 2;
frag->base.order = order;
frag->base.des_flags = flags;
@ -146,10 +142,10 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
static inline struct mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_src_send (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
bool use_eager_get = (*size + reserve) > mca_btl_ugni_component.smsg_max_data;
bool send_in_place;
@ -171,11 +167,11 @@ mca_btl_ugni_prepare_src_send (struct mca_btl_base_module_t *btl,
static inline struct mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_src_rdma (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor,
uint8_t order, size_t *size,
uint32_t flags)
struct opal_convertor_t *convertor,
uint8_t order, size_t *size,
uint32_t flags)
{
mca_btl_ugni_base_frag_t *frag;
void *data_ptr;
@ -206,18 +202,24 @@ mca_btl_ugni_prepare_src_rdma (struct mca_btl_base_module_t *btl,
frag->registration = (mca_btl_ugni_reg_t *) registration;
}
memcpy ((void *) frag->segments[0].seg_key.key64,
(void *)&((mca_btl_ugni_reg_t *)registration)->memory_hdl,
sizeof (((mca_btl_ugni_reg_t *)registration)->memory_hdl));
frag->segments[0].memory_handle = ((mca_btl_ugni_reg_t *)registration)->memory_hdl;
} else {
memset ((void *) frag->segments[0].seg_key.key64, 0,
sizeof (frag->segments[0].seg_key.key64));
memset ((void *) &frag->segments[0].memory_handle, 0,
sizeof (frag->segments[0].memory_handle));
}
frag->segments[0].seg_addr.pval = data_ptr;
frag->segments[0].seg_len = *size;
if ((flags & MCA_BTL_DES_FLAGS_GET) && (*size & 0x3)) {
memmove (frag->segments[0].extra_bytes, (char *) data_ptr + (*size & ~0x3),
*size & 0x3);
frag->segments[0].extra_byte_count = *size & 0x3;
} else {
frag->segments[0].extra_byte_count = 0;
}
frag->base.des_src = frag->segments;
frag->segments[0].base.seg_addr.pval = data_ptr;
frag->segments[0].base.seg_len = *size;
frag->base.des_src = &frag->segments->base;
frag->base.des_src_cnt = 1;
frag->base.order = order;
frag->base.des_flags = flags;

Просмотреть файл

@ -33,5 +33,6 @@ int mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
return mca_btl_ugni_post (frag, false, des->des_src, des->des_dst);
return mca_btl_ugni_post (frag, false, (mca_btl_ugni_segment_t *) des->des_src,
(mca_btl_ugni_segment_t *) des->des_dst);
}

Просмотреть файл

@ -23,18 +23,18 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
static inline void init_gni_post_desc (mca_btl_ugni_base_frag_t *frag,
gni_post_type_t op_type,
uint64_t lcl_addr,
gni_mem_handle_t *lcl_mdh,
gni_mem_handle_t lcl_mdh,
uint64_t rem_addr,
gni_mem_handle_t *rem_mdh,
gni_mem_handle_t rem_mdh,
uint64_t bufsize,
gni_cq_handle_t cq_hndl) {
frag->post_desc.base.type = op_type;
frag->post_desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
frag->post_desc.base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
frag->post_desc.base.local_addr = (uint64_t) lcl_addr;
frag->post_desc.base.local_mem_hndl = *lcl_mdh;
frag->post_desc.base.local_mem_hndl = lcl_mdh;
frag->post_desc.base.remote_addr = (uint64_t) rem_addr;
frag->post_desc.base.remote_mem_hndl = *rem_mdh;
frag->post_desc.base.remote_mem_hndl = rem_mdh;
frag->post_desc.base.length = bufsize;
frag->post_desc.base.rdma_mode = 0;
frag->post_desc.base.src_cq_hndl = cq_hndl;
@ -42,15 +42,13 @@ static inline void init_gni_post_desc (mca_btl_ugni_base_frag_t *frag,
}
static inline int mca_btl_ugni_post_fma (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
mca_btl_base_segment_t *lcl_seg, mca_btl_base_segment_t *rem_seg)
mca_btl_ugni_segment_t *lcl_seg, mca_btl_ugni_segment_t *rem_seg)
{
gni_return_t rc;
/* Post descriptor */
init_gni_post_desc (frag, op_type, lcl_seg->seg_addr.lval,
(gni_mem_handle_t *)&lcl_seg->seg_key.key64,
rem_seg->seg_addr.lval, (gni_mem_handle_t *)&rem_seg->seg_key.key64,
lcl_seg->seg_len, 0); /* CQ is ignored for FMA transactions */
/* Post descriptor (CQ is ignored for FMA transactions) */
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len, 0);
rc = GNI_PostFma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
if (GNI_RC_SUCCESS != rc) {
@ -62,15 +60,14 @@ static inline int mca_btl_ugni_post_fma (mca_btl_ugni_base_frag_t *frag, gni_pos
}
static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
mca_btl_base_segment_t *lcl_seg, mca_btl_base_segment_t *rem_seg)
mca_btl_ugni_segment_t *lcl_seg, mca_btl_ugni_segment_t *rem_seg)
{
gni_return_t rc;
/* Post descriptor */
init_gni_post_desc (frag, op_type, lcl_seg->seg_addr.lval,
(gni_mem_handle_t *)&lcl_seg->seg_key.key64,
rem_seg->seg_addr.lval, (gni_mem_handle_t *)&rem_seg->seg_key.key64,
lcl_seg->seg_len, frag->endpoint->btl->rdma_local_cq);
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
frag->endpoint->btl->rdma_local_cq);
rc = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
if (GNI_RC_SUCCESS != rc) {
@ -81,8 +78,8 @@ static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_pos
return OMPI_SUCCESS;
}
static inline int mca_btl_ugni_post (mca_btl_ugni_base_frag_t *frag, bool get, mca_btl_base_segment_t *lcl_seg,
mca_btl_base_segment_t *rem_seg) {
static inline int mca_btl_ugni_post (mca_btl_ugni_base_frag_t *frag, bool get, mca_btl_ugni_segment_t *lcl_seg,
mca_btl_ugni_segment_t *rem_seg) {
frag->cbfunc = mca_btl_ugni_frag_complete;
if (frag->base.des_src->seg_len <= mca_btl_ugni_component.ugni_fma_limit) {

Просмотреть файл

@ -20,13 +20,13 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag)
{
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) descriptor;
size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len;
size_t size = frag->segments[0].base.seg_len + frag->segments[1].base.seg_len;
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
int flags_save = frag->base.des_flags;
int rc;
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %d", (void *)descriptor,
ORTE_PROC_MY_NAME->vpid, endpoint->common->ep_rem_id, frag->segments[0].seg_len));
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
ORTE_PROC_MY_NAME->vpid, endpoint->common->ep_rem_id, frag->segments[0].base.seg_len));
/* tag and len are at the same location in eager and smsg frag hdrs */
frag->hdr.send.lag = (tag << 24) | size;
@ -70,8 +70,9 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
if (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc)) {
/* queue up request */
if (0 == opal_list_get_size (&endpoint->frag_wait_list)) {
if (false == endpoint->wait_listed) {
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
endpoint->wait_listed = true;
}
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);

Просмотреть файл

@ -50,6 +50,7 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
{
mca_btl_active_message_callback_t *reg;
mca_btl_ugni_base_frag_t frag;
mca_btl_base_segment_t seg;
bool disconnect = false;
uintptr_t data_ptr;
gni_return_t rc;
@ -100,11 +101,11 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
BTL_VERBOSE(("received smsg fragment. hdr = {len = %u, tag = %d}", len, tag));
reg = mca_btl_base_active_message_trigger + tag;
frag.base.des_dst = frag.segments;
frag.base.des_dst = &seg;
frag.base.des_dst_cnt = 1;
frag.segments[0].seg_addr.pval = (void *)((uintptr_t)data_ptr + sizeof (mca_btl_ugni_send_frag_hdr_t));
frag.segments[0].seg_len = len;
seg.seg_addr.pval = (void *)((uintptr_t)data_ptr + sizeof (mca_btl_ugni_send_frag_hdr_t));
seg.seg_len = len;
assert (NULL != reg->cbfunc);
@ -186,7 +187,6 @@ int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
gni_cq_entry_t event_data;
gni_return_t grc;
uint64_t inst_id;
int rc;
grc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
if (GNI_RC_NOT_DONE == grc) {

Просмотреть файл

@ -104,11 +104,12 @@ static inline int mca_btl_ugni_send_frag (struct mca_btl_base_endpoint_t *btl_pe
mca_btl_ugni_base_frag_t *frag) {
if (OPAL_LIKELY(!(frag->flags & MCA_BTL_UGNI_FRAG_EAGER))) {
return ompi_mca_btl_ugni_smsg_send (frag, &frag->hdr.send, frag->hdr_size,
frag->segments[1].seg_addr.pval, frag->segments[1].seg_len,
frag->segments[1].base.seg_addr.pval,
frag->segments[1].base.seg_len,
MCA_BTL_UGNI_TAG_SEND);
}
frag->hdr.eager.src_seg = frag->segments[1];
frag->hdr.eager.src_seg = frag->segments[1].base;
frag->hdr.eager.ctx = (void *) frag;
return ompi_mca_btl_ugni_smsg_send (frag, &frag->hdr.eager, frag->hdr_size,

Просмотреть файл

@ -80,6 +80,7 @@ mca_btl_vader_t mca_btl_vader = {
0, /* bTl_latency */
0, /* btl_bandwidth */
0, /* btl_flags */
0, /* btl segment size */
vader_add_procs,
vader_del_procs,
NULL, /* btl_register */
@ -569,8 +570,8 @@ struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t
opal_convertor_get_current_pointer (convertor, (void **) &data_ptr);
frag->segment.seg_key.key64[0] = (uint64_t)(uintptr_t) data_ptr;
frag->segment.seg_len = *size;
frag->segment.seg_addr.pval = data_ptr;
frag->segment.seg_len = *size;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
@ -664,8 +665,8 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
return NULL;
}
frag->segment.seg_key.key64[0] = (uint64_t)(uintptr_t) data_ptr;
frag->segment.seg_len = reserve + *size;
frag->segment.seg_addr.pval = data_ptr;
frag->segment.seg_len = reserve + *size;
}
frag->base.des_src = &frag->segment;

Просмотреть файл

@ -140,6 +140,7 @@ static int mca_btl_vader_component_register (void)
mca_btl_vader.super.btl_min_rdma_pipeline_size = mca_btl_vader.super.btl_eager_limit;
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_PUT |
MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_vader.super.btl_seg_size = sizeof (mca_btl_base_segment_t);
mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */
mca_btl_vader.super.btl_latency = 1; /* Microsecs */

Просмотреть файл

@ -33,16 +33,15 @@ int mca_btl_vader_get (struct mca_btl_base_module_t *btl,
mca_mpool_base_registration_t *reg;
void *rem_ptr;
reg = vader_get_registation (endpoint->peer_smp_rank,
(void *)(uintptr_t) src->seg_key.key64[0],
reg = vader_get_registation (endpoint->peer_smp_rank, src->seg_addr.pval,
src->seg_len, 0);
if (OPAL_UNLIKELY(NULL == reg)) {
return OMPI_ERROR;
}
rem_ptr = vader_reg_to_ptr (reg, (void *)(uintptr_t) src->seg_key.key64[0]);
rem_ptr = vader_reg_to_ptr (reg, src->seg_addr.pval);
vader_memmove ((void *)(uintptr_t) dst->seg_key.key64[0], rem_ptr, size);
vader_memmove (dst->seg_addr.pval, rem_ptr, size);
vader_return_registration (reg, endpoint->peer_smp_rank);

Просмотреть файл

@ -33,16 +33,15 @@ int mca_btl_vader_put (struct mca_btl_base_module_t *btl,
mca_mpool_base_registration_t *reg;
void *rem_ptr;
reg = vader_get_registation (endpoint->peer_smp_rank,
(void *)(uintptr_t) dst->seg_key.key64[0],
reg = vader_get_registation (endpoint->peer_smp_rank, dst->seg_addr.pval,
dst->seg_len, 0);
if (OPAL_UNLIKELY(NULL == reg)) {
return OMPI_ERROR;
}
rem_ptr = vader_reg_to_ptr (reg, (void *)(uintptr_t) dst->seg_key.key64[0]);
rem_ptr = vader_reg_to_ptr (reg, dst->seg_addr.pval);
vader_memmove (rem_ptr, (void *)(uintptr_t) src->seg_key.key64[0], size);
vader_memmove (rem_ptr, src->seg_addr.pval, size);
vader_return_registration (reg, endpoint->peer_smp_rank);

Просмотреть файл

@ -75,7 +75,7 @@ typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;
struct ompi_osc_rdma_btl_t {
uint64_t peer_seg_key;
uint8_t peer_seg[MCA_BTL_SEG_MAX_SIZE];
mca_bml_base_btl_t *bml_btl;
int rdma_order;
int32_t num_sent;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
@ -8,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
@ -855,7 +856,8 @@ component_fragment_cb(struct mca_btl_base_module_t *btl,
index = module->m_peer_info[origin].peer_num_btls++;
rdma_btl = &(module->m_peer_info[origin].peer_btls[index]);
rdma_btl->peer_seg_key = header->hdr_segkey;
memmove (rdma_btl->peer_seg, header + 1, bml_btl->btl->btl_seg_size);
rdma_btl->bml_btl = bml_btl;
rdma_btl->rdma_order = MCA_BTL_NO_ORDER;
rdma_btl->num_sent = 0;
@ -982,7 +984,7 @@ struct peer_rdma_send_info_t{
ompi_osc_rdma_module_t *module;
ompi_proc_t *proc;
mca_bml_base_btl_t *bml_btl;
uint64_t seg_key;
void *seg;
};
typedef struct peer_rdma_send_info_t peer_rdma_send_info_t;
OBJ_CLASS_INSTANCE(peer_rdma_send_info_t, opal_list_item_t, NULL, NULL);
@ -1021,10 +1023,13 @@ rdma_send_info_send(ompi_osc_rdma_module_t *module,
mca_bml_base_btl_t *bml_btl = NULL;
mca_btl_base_descriptor_t *descriptor = NULL;
ompi_osc_rdma_rdma_info_header_t *header = NULL;
size_t hdr_size;
bml_btl = peer_send_info->bml_btl;
mca_bml_base_alloc(bml_btl, &descriptor, MCA_BTL_NO_ORDER,
sizeof(ompi_osc_rdma_rdma_info_header_t),
hdr_size = sizeof(ompi_osc_rdma_rdma_info_header_t) + bml_btl->btl->btl_seg_size;
mca_bml_base_alloc(bml_btl, &descriptor, MCA_BTL_NO_ORDER, hdr_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
if (NULL == descriptor) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
@ -1032,7 +1037,7 @@ rdma_send_info_send(ompi_osc_rdma_module_t *module,
}
/* verify at least enough space for header */
if (descriptor->des_src[0].seg_len < sizeof(ompi_osc_rdma_rdma_info_header_t)) {
if (descriptor->des_src[0].seg_len < hdr_size) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
@ -1046,10 +1051,11 @@ rdma_send_info_send(ompi_osc_rdma_module_t *module,
header = (ompi_osc_rdma_rdma_info_header_t*) descriptor->des_src[0].seg_addr.pval;
header->hdr_base.hdr_type = OMPI_OSC_RDMA_HDR_RDMA_INFO;
header->hdr_base.hdr_flags = 0;
header->hdr_segkey = peer_send_info->seg_key;
header->hdr_origin = ompi_comm_rank(module->m_comm);
header->hdr_windx = ompi_comm_get_cid(module->m_comm);
memmove (header + 1, peer_send_info->seg, bml_btl->btl->btl_seg_size);
#ifdef WORDS_BIGENDIAN
header->hdr_base.hdr_flags |= OMPI_OSC_RDMA_HDR_FLAG_NBO;
#elif OPAL_ENABLE_HETEROGENEOUS_SUPPORT
@ -1299,8 +1305,7 @@ setup_rdma(ompi_osc_rdma_module_t *module)
peer_send_info->module = module;
peer_send_info->proc = ompi_comm_peer_lookup(module->m_comm, i);
peer_send_info->bml_btl = peer_info->local_btls[j];
peer_send_info->seg_key =
peer_info->local_descriptors[j]->des_dst[0].seg_key.key64[0];
peer_send_info->seg = (void *) peer_info->local_descriptors[j]->des_dst;
ret = rdma_send_info_send(module, peer_send_info);
if (OMPI_SUCCESS != ret) {

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
@ -7,7 +8,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
@ -186,15 +187,15 @@ ompi_osc_rdma_sendreq_rdma(ompi_osc_rdma_module_t *module,
assert(NULL != descriptor);
descriptor->des_dst = sendreq->remote_segs;
descriptor->des_dst = (mca_btl_base_segment_t *) sendreq->remote_segs;
descriptor->des_dst_cnt = 1;
memmove (descriptor->des_dst, rdma_btl->peer_seg, sizeof (rdma_btl->peer_seg));
descriptor->des_dst[0].seg_addr.lval =
module->m_peer_info[target].peer_base +
((unsigned long)sendreq->req_target_disp * module->m_win->w_disp_unit);
descriptor->des_dst[0].seg_len =
sendreq->req_origin_bytes_packed;
descriptor->des_dst[0].seg_key.key64[0] =
rdma_btl->peer_seg_key;
#if 0
opal_output(0, "putting to %d: 0x%lx(%d), %d, %d",
target, descriptor->des_dst[0].seg_addr.lval,
@ -214,15 +215,15 @@ ompi_osc_rdma_sendreq_rdma(ompi_osc_rdma_module_t *module,
assert(NULL != descriptor);
descriptor->des_src = sendreq->remote_segs;
descriptor->des_src = (mca_btl_base_segment_t *) sendreq->remote_segs;
descriptor->des_src_cnt = 1;
memmove (descriptor->des_src, rdma_btl->peer_seg, sizeof (rdma_btl->peer_seg));
descriptor->des_src[0].seg_addr.lval =
module->m_peer_info[target].peer_base +
((unsigned long)sendreq->req_target_disp * module->m_win->w_disp_unit);
descriptor->des_src[0].seg_len =
sendreq->req_origin_bytes_packed;
descriptor->des_src[0].seg_key.key64[0] =
rdma_btl->peer_seg_key;
descriptor->des_cbdata = sendreq;
descriptor->des_cbfunc = rdma_cb;

Просмотреть файл

@ -149,7 +149,6 @@ struct ompi_osc_rdma_rdma_info_header_t {
ompi_osc_rdma_base_header_t hdr_base;
int16_t hdr_windx;
int32_t hdr_origin;
uint64_t hdr_segkey;
};
typedef struct ompi_osc_rdma_rdma_info_header_t ompi_osc_rdma_rdma_info_header_t;

Просмотреть файл

@ -70,7 +70,7 @@ struct ompi_osc_rdma_sendreq_t {
/** op index on the target */
int req_op_id;
mca_btl_base_segment_t remote_segs[1];
uint8_t remote_segs[MCA_BTL_SEG_MAX_SIZE];
};
typedef struct ompi_osc_rdma_sendreq_t ompi_osc_rdma_sendreq_t;
OBJ_CLASS_DECLARATION(ompi_osc_rdma_sendreq_t);

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -285,15 +286,30 @@ void mca_pml_bfo_process_pending_rdma(void);
/*
* Compute the total number of bytes on supplied descriptor
*/
#define MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH(segments, count, hdrlen, length) \
do { \
size_t i; \
\
for( i = 0; i < count; i++ ) { \
length += segments[i].seg_len; \
} \
length -= hdrlen; \
} while(0)
static inline int mca_pml_bfo_compute_segment_length (size_t seg_size, void *segments, size_t count,
size_t hdrlen) {
size_t i, length;
for (i = 0, length = -hdrlen ; i < count ; ++i) {
mca_btl_base_segment_t *segment =
(mca_btl_base_segment_t *)((char *) segments + i * seg_size);
length += segment->seg_len;
}
return length;
}
static inline int mca_pml_bfo_compute_segment_length_base (mca_btl_base_segment_t *segments,
size_t count, size_t hdrlen) {
size_t i, length;
for (i = 0, length = -hdrlen ; i < count ; ++i) {
length += segments[i].seg_len;
}
return length;
}
/* represent BTL chosen for sending request */
struct mca_pml_bfo_com_btl_t {

Просмотреть файл

@ -170,7 +170,6 @@ struct mca_pml_bfo_rget_hdr_t {
uint8_t hdr_padding[4];
#endif
ompi_ptr_t hdr_des; /**< source descriptor */
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
};
typedef struct mca_pml_bfo_rget_hdr_t mca_pml_bfo_rget_hdr_t;

Просмотреть файл

@ -42,7 +42,7 @@ struct mca_pml_bfo_rdma_frag_t {
mca_pml_bfo_hdr_t rdma_hdr;
mca_pml_bfo_rdma_state_t rdma_state;
size_t rdma_length;
mca_btl_base_segment_t rdma_segs[MCA_BTL_DES_MAX_SEGMENTS];
uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS];
void *rdma_req;
struct mca_bml_base_endpoint_t* rdma_ep;
opal_convertor_t convertor;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -200,8 +201,9 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
size_t bytes_received = 0;
if( OPAL_LIKELY(status == OMPI_SUCCESS) ) {
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( des->des_dst, des->des_dst_cnt,
0, bytes_received );
bytes_received = mca_pml_bfo_compute_segment_length (btl->btl_seg_size,
(void *) des->des_dst,
des->des_dst_cnt, 0);
}
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
@ -423,7 +425,7 @@ int mca_pml_bfo_recv_request_get_frag( mca_pml_bfo_rdma_frag_t* frag )
return OMPI_ERR_OUT_OF_RESOURCE;
}
descriptor->des_src = frag->rdma_segs;
descriptor->des_src = (mca_btl_base_segment_t *) frag->rdma_segs;
descriptor->des_src_cnt = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
descriptor->des_cbfunc = mca_pml_bfo_rget_completion;
descriptor->des_cbdata = frag;
@ -470,13 +472,12 @@ void mca_pml_bfo_recv_request_progress_frag( mca_pml_bfo_recv_request_t* recvreq
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0, data_offset = 0;
size_t bytes_received, data_offset = 0;
size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_BFO_RECV_REQUEST_UNPACK */
mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval;
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
bytes_received -= sizeof(mca_pml_bfo_frag_hdr_t);
bytes_received = mca_pml_bfo_compute_segment_length_base (segments, num_segments,
sizeof(mca_pml_bfo_frag_hdr_t));
data_offset = hdr->hdr_frag.hdr_frag_offset;
/*
* Make user buffer accessable(defined) before unpacking.
@ -523,15 +524,12 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0;
mca_pml_bfo_rget_hdr_t* hdr = (mca_pml_bfo_rget_hdr_t*)segments->seg_addr.pval;
mca_bml_base_endpoint_t* bml_endpoint = NULL;
mca_pml_bfo_rdma_frag_t* frag;
size_t i, size = 0;
int rc;
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
#if PML_BFO
@ -565,17 +563,22 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
/* lookup bml datastructures */
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_bml;
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
/* allocate/initialize a fragment */
memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
for(i = 0; i < hdr->hdr_seg_cnt; i++) {
frag->rdma_segs[i] = hdr->hdr_segs[i];
mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *)(frag->rdma_segs + i * btl->btl_seg_size);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if ((recvreq->req_recv.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
(ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
size += opal_swap_bytes4(hdr->hdr_segs[i].seg_len);
size += opal_swap_bytes4(seg->seg_len);
} else
#endif
{
size += hdr->hdr_segs[i].seg_len;
size += seg->seg_len;
}
}
#if PML_BFO
@ -626,15 +629,14 @@ void mca_pml_bfo_recv_request_progress_rndv( mca_pml_bfo_recv_request_t* recvreq
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0;
size_t bytes_received;
size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_BFO_RECV_REQUEST_UNPACK */
size_t data_offset = 0;
mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval;
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
bytes_received -= sizeof(mca_pml_bfo_rendezvous_hdr_t);
bytes_received = mca_pml_bfo_compute_segment_length_base (segments, num_segments,
sizeof(mca_pml_bfo_rendezvous_hdr_t));
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req;
recvreq->req_rdma_offset = bytes_received;
@ -684,13 +686,13 @@ void mca_pml_bfo_recv_request_progress_match( mca_pml_bfo_recv_request_t* recvre
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0, data_offset = 0;
size_t bytes_received, data_offset = 0;
size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_BFO_RECV_REQUEST_UNPACK */
mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval;
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
bytes_received -= OMPI_PML_BFO_MATCH_HDR_LEN;
bytes_received = mca_pml_bfo_compute_segment_length_base (segments, num_segments,
OMPI_PML_BFO_MATCH_HDR_LEN);
recvreq->req_recv.req_bytes_packed = bytes_received;
MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match);
@ -743,15 +745,11 @@ void mca_pml_bfo_recv_request_matched_probe( mca_pml_bfo_recv_request_t* recvreq
switch(hdr->hdr_common.hdr_type) {
case MCA_PML_BFO_HDR_TYPE_MATCH:
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
OMPI_PML_BFO_MATCH_HDR_LEN,
bytes_packed );
bytes_packed = mca_pml_bfo_compute_segment_length_base (segments, num_segments,
OMPI_PML_BFO_MATCH_HDR_LEN);
break;
case MCA_PML_BFO_HDR_TYPE_RNDV:
case MCA_PML_BFO_HDR_TYPE_RGET:
bytes_packed = hdr->hdr_rndv.hdr_msg_length;
break;
}
@ -794,8 +792,7 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
while(bytes_remaining > 0 &&
recvreq->req_pipeline_depth < mca_pml_bfo.recv_pipeline_depth) {
size_t hdr_size;
size_t size;
size_t size, seg_size;
mca_pml_bfo_rdma_hdr_t* hdr;
mca_btl_base_descriptor_t* dst;
mca_btl_base_descriptor_t* ctl;
@ -856,14 +853,10 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
dst->des_cbfunc = mca_pml_bfo_put_completion;
dst->des_cbdata = recvreq;
/* prepare a descriptor for rdma control message */
hdr_size = sizeof(mca_pml_bfo_rdma_hdr_t);
if(dst->des_dst_cnt > 1) {
hdr_size += (sizeof(mca_btl_base_segment_t) *
(dst->des_dst_cnt-1));
}
seg_size = btl->btl_seg_size * dst->des_dst_cnt;
mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, hdr_size,
/* prepare a descriptor for rdma control message */
mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof(mca_pml_bfo_rdma_hdr_t) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
if( OPAL_UNLIKELY(NULL == ctl) ) {
@ -888,12 +881,8 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
hdr->hdr_seg_cnt = dst->des_dst_cnt;
for( i = 0; i < dst->des_dst_cnt; i++ ) {
hdr->hdr_segs[i].seg_addr.lval = ompi_ptr_ptol(dst->des_dst[i].seg_addr.pval);
hdr->hdr_segs[i].seg_len = dst->des_dst[i].seg_len;
hdr->hdr_segs[i].seg_key.key64[0] = dst->des_dst[i].seg_key.key64[0];
hdr->hdr_segs[i].seg_key.key64[1] = dst->des_dst[i].seg_key.key64[1];
}
/* copy segments */
memmove (hdr + 1, dst->des_dst, seg_size);
if(!recvreq->req_ack_sent)
recvreq->req_ack_sent = true;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -233,7 +234,7 @@ mca_pml_bfo_rndv_completion( mca_btl_base_module_t* btl,
{
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
size_t req_bytes_delivered = 0;
size_t req_bytes_delivered;
/* check completion status */
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
@ -256,10 +257,10 @@ mca_pml_bfo_rndv_completion( mca_btl_base_module_t* btl,
* happens in one thread, the increase of the req_bytes_delivered does not
* have to be atomic.
*/
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( des->des_src,
des->des_src_cnt,
sizeof(mca_pml_bfo_rendezvous_hdr_t),
req_bytes_delivered );
req_bytes_delivered = mca_pml_bfo_compute_segment_length (btl->btl_seg_size,
(void *) des->des_src,
des->des_src_cnt,
sizeof(mca_pml_bfo_rendezvous_hdr_t));
#if PML_BFO
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RNDV");
@ -280,14 +281,15 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
{
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
size_t req_bytes_delivered = 0;
size_t req_bytes_delivered;
#if PML_BFO
MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des);
#endif /* PML_BFO */
/* count bytes of user data actually delivered and check for request completion */
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( des->des_src, des->des_src_cnt,
0, req_bytes_delivered );
req_bytes_delivered = mca_pml_bfo_compute_segment_length (btl->btl_seg_size,
(void *) des->des_src,
des->des_src_cnt, 0);
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
send_request_pml_complete_check(sendreq);
@ -338,7 +340,7 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl,
{
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
size_t req_bytes_delivered = 0;
size_t req_bytes_delivered;
#if PML_BFO
sendreq->req_events--;
#endif /* PML_BFO */
@ -355,10 +357,10 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl,
}
/* count bytes of user data actually delivered */
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( des->des_src,
des->des_src_cnt,
sizeof(mca_pml_bfo_frag_hdr_t),
req_bytes_delivered );
req_bytes_delivered = mca_pml_bfo_compute_segment_length (btl->btl_seg_size,
(void *) des->des_src,
des->des_src_cnt,
sizeof(mca_pml_bfo_frag_hdr_t));
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, -1);
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
@ -712,7 +714,7 @@ int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq,
bml_btl = sendreq->req_rdma[0].bml_btl;
if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
size_t i;
size_t seg_size;
size_t old_position = sendreq->req_send.req_base.req_convertor.bConverted;
MEMCHECKER(
@ -746,10 +748,11 @@ int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq,
src->des_cbfunc = mca_pml_bfo_rget_completion;
src->des_cbdata = sendreq;
seg_size = bml_btl->btl->btl_seg_size * src->des_src_cnt;
/* allocate space for get hdr + segment list */
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER,
sizeof(mca_pml_bfo_rget_hdr_t) +
(sizeof(mca_btl_base_segment_t) * (src->des_src_cnt-1)),
sizeof(mca_pml_bfo_rget_hdr_t) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if( OPAL_UNLIKELY(NULL == des) ) {
opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor,
@ -778,19 +781,8 @@ int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq,
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RGET,
sendreq->req_send.req_base.req_proc);
for( i = 0; i < src->des_src_cnt; i++ ) {
hdr->hdr_rget.hdr_segs[i].seg_addr.lval = ompi_ptr_ptol(src->des_src[i].seg_addr.pval);
hdr->hdr_rget.hdr_segs[i].seg_len = src->des_src[i].seg_len;
#if OMPI_CUDA_SUPPORT_41
memcpy(hdr->hdr_rget.hdr_segs[i].seg_key.cudakey, src->des_src[i].seg_key.cudakey,
sizeof(src->des_src[i].seg_key.cudakey));
hdr->hdr_rget.hdr_segs[i].memh_seg_addr.lval = ompi_ptr_ptol(src->des_src[i].memh_seg_addr.pval);
hdr->hdr_rget.hdr_segs[i].memh_seg_len = src->des_src[i].memh_seg_len;
#else /* OMPI_CUDA_SUPPORT_41 */
hdr->hdr_rget.hdr_segs[i].seg_key.key64[0] = src->des_src[i].seg_key.key64[0];
hdr->hdr_rget.hdr_segs[i].seg_key.key64[1] = src->des_src[i].seg_key.key64[1];
#endif /* OMPI_CUDA_SUPPORT_41 */
}
/* copy segment data */
memmove (&hdr->hdr_rget + 1, src->des_src, seg_size);
des->des_cbfunc = mca_pml_bfo_send_ctl_completion;
@ -1299,8 +1291,8 @@ int mca_pml_bfo_send_request_put_frag( mca_pml_bfo_rdma_frag_t* frag )
}
return OMPI_ERR_OUT_OF_RESOURCE;
}
des->des_dst = frag->rdma_segs;
des->des_dst = (mca_btl_base_segment_t *) frag->rdma_segs;
des->des_dst_cnt = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
des->des_cbfunc = mca_pml_bfo_put_completion;
des->des_cbdata = frag;
@ -1363,21 +1355,22 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
orte_errmgr.abort(-1, NULL);
}
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
/* setup fragment */
memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
for( i = 0; i < hdr->hdr_seg_cnt; i++ ) {
frag->rdma_segs[i].seg_addr.lval = hdr->hdr_segs[i].seg_addr.lval;
frag->rdma_segs[i].seg_len = hdr->hdr_segs[i].seg_len;
frag->rdma_segs[i].seg_key.key64[0] = hdr->hdr_segs[i].seg_key.key64[0];
frag->rdma_segs[i].seg_key.key64[1] = hdr->hdr_segs[i].seg_key.key64[1];
mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *) ((uintptr_t)(frag->rdma_segs) + i * btl->btl_seg_size);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if ((sendreq->req_send.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
(ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
size += opal_swap_bytes4(frag->rdma_segs[i].seg_len);
size += opal_swap_bytes4(seg->seg_len);
} else
#endif
{
size += frag->rdma_segs[i].seg_len;
size += seg->seg_len;
}
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
* Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
@ -283,15 +284,30 @@ void mca_pml_csum_process_pending_rdma(void);
/*
* Compute the total number of bytes on supplied descriptor
*/
#define MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH(segments, count, hdrlen, length) \
do { \
size_t i; \
\
for( i = 0; i < count; i++ ) { \
length += segments[i].seg_len; \
} \
length -= hdrlen; \
} while(0)
static inline int mca_pml_csum_compute_segment_length (size_t seg_size, void *segments, size_t count,
size_t hdrlen) {
size_t i, length;
for (i = 0, length = -hdrlen ; i < count ; ++i) {
mca_btl_base_segment_t *segment =
(mca_btl_base_segment_t *)((char *) segments + i * seg_size);
length += segment->seg_len;
}
return length;
}
static inline int mca_pml_csum_compute_segment_length_base (mca_btl_base_segment_t *segments,
size_t count, size_t hdrlen) {
size_t i, length;
for (i = 0, length = -hdrlen ; i < count ; ++i) {
length += segments[i].seg_len;
}
return length;
}
/* represent BTL chosen for sending request */
struct mca_pml_csum_com_btl_t {

Просмотреть файл

@ -139,7 +139,6 @@ struct mca_pml_csum_rget_hdr_t {
uint8_t hdr_padding[4];
#endif
ompi_ptr_t hdr_des; /**< source descriptor */
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
};
typedef struct mca_pml_csum_rget_hdr_t mca_pml_csum_rget_hdr_t;

Просмотреть файл

@ -38,7 +38,7 @@ struct mca_pml_csum_rdma_frag_t {
mca_pml_csum_hdr_t rdma_hdr;
mca_pml_csum_rdma_state_t rdma_state;
size_t rdma_length;
mca_btl_base_segment_t rdma_segs[MCA_BTL_DES_MAX_SEGMENTS];
uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS];
void *rdma_req;
struct mca_bml_base_endpoint_t* rdma_ep;
opal_convertor_t convertor;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -219,8 +220,9 @@ static void mca_pml_csum_put_completion( mca_btl_base_module_t* btl,
size_t bytes_received = 0;
if( OPAL_LIKELY(status == OMPI_SUCCESS) ) {
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( des->des_dst, des->des_dst_cnt,
0, bytes_received );
bytes_received = mca_pml_csum_compute_segment_length (btl->btl_seg_size,
(void *) des->des_dst,
des->des_dst_cnt, 0);
}
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
@ -413,7 +415,7 @@ int mca_pml_csum_recv_request_get_frag( mca_pml_csum_rdma_frag_t* frag )
return OMPI_ERR_OUT_OF_RESOURCE;
}
descriptor->des_src = frag->rdma_segs;
descriptor->des_src = (mca_btl_base_segment_t *) frag->rdma_segs;
descriptor->des_src_cnt = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
descriptor->des_cbfunc = mca_pml_csum_rget_completion;
descriptor->des_cbdata = frag;
@ -454,14 +456,13 @@ void mca_pml_csum_recv_request_progress_frag( mca_pml_csum_recv_request_t* recvr
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0, data_offset = 0;
size_t bytes_received, data_offset = 0;
size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_CSUM_RECV_REQUEST_UNPACK */
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
uint32_t csum = OPAL_CSUM_ZERO;
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
bytes_received -= sizeof(mca_pml_csum_frag_hdr_t);
bytes_received = mca_pml_csum_compute_segment_length_base (segments, num_segments,
sizeof(mca_pml_csum_frag_hdr_t));
data_offset = hdr->hdr_frag.hdr_frag_offset;
/*
* Make user buffer accessable(defined) before unpacking.
@ -524,15 +525,12 @@ void mca_pml_csum_recv_request_progress_rget( mca_pml_csum_recv_request_t* recvr
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0;
mca_pml_csum_rget_hdr_t* hdr = (mca_pml_csum_rget_hdr_t*)segments->seg_addr.pval;
mca_bml_base_endpoint_t* bml_endpoint = NULL;
mca_pml_csum_rdma_frag_t* frag;
size_t i, size = 0;
int rc;
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
MCA_PML_CSUM_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match);
@ -556,17 +554,22 @@ void mca_pml_csum_recv_request_progress_rget( mca_pml_csum_recv_request_t* recvr
/* lookup bml datastructures */
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_bml;
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
/* allocate/initialize a fragment */
memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
for(i = 0; i < hdr->hdr_seg_cnt; i++) {
frag->rdma_segs[i] = hdr->hdr_segs[i];
mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *)(frag->rdma_segs + i * btl->btl_seg_size);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if ((recvreq->req_recv.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
(ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
size += opal_swap_bytes4(hdr->hdr_segs[i].seg_len);
size += opal_swap_bytes4(seg->seg_len);
} else
#endif
{
size += hdr->hdr_segs[i].seg_len;
size += seg->seg_len;
}
}
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
@ -595,16 +598,15 @@ void mca_pml_csum_recv_request_progress_rndv( mca_pml_csum_recv_request_t* recvr
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0;
size_t bytes_received;
size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_CSUM_RECV_REQUEST_UNPACK */
size_t data_offset = 0;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
uint32_t csum = OPAL_CSUM_ZERO;
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
bytes_received -= sizeof(mca_pml_csum_rendezvous_hdr_t);
bytes_received = mca_pml_csum_compute_segment_length_base (segments, num_segments,
sizeof(mca_pml_csum_rendezvous_hdr_t));
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req;
recvreq->req_rdma_offset = bytes_received;
@ -669,14 +671,14 @@ void mca_pml_csum_recv_request_progress_match( mca_pml_csum_recv_request_t* recv
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0, data_offset = 0;
size_t bytes_received, data_offset = 0;
size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_CSUM_RECV_REQUEST_UNPACK */
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
uint32_t csum = OPAL_CSUM_ZERO;
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
bytes_received -= OMPI_PML_CSUM_MATCH_HDR_LEN;
bytes_received = mca_pml_csum_compute_segment_length_base (segments, num_segments,
OMPI_PML_CSUM_MATCH_HDR_LEN);
recvreq->req_recv.req_bytes_packed = bytes_received;
MCA_PML_CSUM_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match);
@ -746,15 +748,11 @@ void mca_pml_csum_recv_request_matched_probe( mca_pml_csum_recv_request_t* recvr
switch(hdr->hdr_common.hdr_type) {
case MCA_PML_CSUM_HDR_TYPE_MATCH:
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
OMPI_PML_CSUM_MATCH_HDR_LEN,
bytes_packed );
bytes_packed = mca_pml_csum_compute_segment_length_base (segments, num_segments,
OMPI_PML_CSUM_MATCH_HDR_LEN);
break;
case MCA_PML_CSUM_HDR_TYPE_RNDV:
case MCA_PML_CSUM_HDR_TYPE_RGET:
bytes_packed = hdr->hdr_rndv.hdr_msg_length;
break;
}
@ -796,8 +794,7 @@ int mca_pml_csum_recv_request_schedule_once( mca_pml_csum_recv_request_t* recvre
while(bytes_remaining > 0 &&
recvreq->req_pipeline_depth < mca_pml_csum.recv_pipeline_depth) {
size_t hdr_size;
size_t size;
size_t size, seg_size;
mca_pml_csum_rdma_hdr_t* hdr;
mca_btl_base_descriptor_t* dst;
mca_btl_base_descriptor_t* ctl;
@ -858,14 +855,10 @@ int mca_pml_csum_recv_request_schedule_once( mca_pml_csum_recv_request_t* recvre
dst->des_cbfunc = mca_pml_csum_put_completion;
dst->des_cbdata = recvreq;
/* prepare a descriptor for rdma control message */
hdr_size = sizeof(mca_pml_csum_rdma_hdr_t);
if(dst->des_dst_cnt > 1) {
hdr_size += (sizeof(mca_btl_base_segment_t) *
(dst->des_dst_cnt-1));
}
seg_size = btl->btl_seg_size * dst->des_dst_cnt;
mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, hdr_size,
/* prepare a descriptor for rdma control message */
mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof(mca_pml_csum_rdma_hdr_t) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
if( OPAL_UNLIKELY(NULL == ctl) ) {
@ -885,12 +878,8 @@ int mca_pml_csum_recv_request_schedule_once( mca_pml_csum_recv_request_t* recvre
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
hdr->hdr_seg_cnt = dst->des_dst_cnt;
for( i = 0; i < dst->des_dst_cnt; i++ ) {
hdr->hdr_segs[i].seg_addr.lval = ompi_ptr_ptol(dst->des_dst[i].seg_addr.pval);
hdr->hdr_segs[i].seg_len = dst->des_dst[i].seg_len;
hdr->hdr_segs[i].seg_key.key64[0] = dst->des_dst[i].seg_key.key64[0];
hdr->hdr_segs[i].seg_key.key64[1] = dst->des_dst[i].seg_key.key64[1];
}
/* copy segments */
memmove (hdr + 1, dst->des_dst, seg_size);
if(!recvreq->req_ack_sent)
recvreq->req_ack_sent = true;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -232,7 +233,7 @@ mca_pml_csum_rndv_completion( mca_btl_base_module_t* btl,
{
mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
size_t req_bytes_delivered = 0;
size_t req_bytes_delivered;
/* check completion status */
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
@ -245,10 +246,10 @@ mca_pml_csum_rndv_completion( mca_btl_base_module_t* btl,
* happens in one thread, the increase of the req_bytes_delivered does not
* have to be atomic.
*/
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( des->des_src,
des->des_src_cnt,
sizeof(mca_pml_csum_rendezvous_hdr_t),
req_bytes_delivered );
req_bytes_delivered = mca_pml_csum_compute_segment_length (btl->btl_seg_size,
(void *) des->des_src,
des->des_src_cnt,
sizeof(mca_pml_csum_rendezvous_hdr_t));
mca_pml_csum_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered );
}
@ -269,8 +270,11 @@ mca_pml_csum_rget_completion( mca_btl_base_module_t* btl,
size_t req_bytes_delivered = 0;
/* count bytes of user data actually delivered and check for request completion */
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( des->des_src, des->des_src_cnt,
0, req_bytes_delivered );
if (OPAL_LIKELY(OMPI_SUCCESS == status)) {
req_bytes_delivered = mca_pml_csum_compute_segment_length (btl->btl_seg_size,
(void *) des->des_src,
des->des_src_cnt, 0);
}
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
send_request_pml_complete_check(sendreq);
@ -309,7 +313,7 @@ mca_pml_csum_frag_completion( mca_btl_base_module_t* btl,
{
mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
size_t req_bytes_delivered = 0;
size_t req_bytes_delivered;
/* check completion status */
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
@ -319,10 +323,10 @@ mca_pml_csum_frag_completion( mca_btl_base_module_t* btl,
}
/* count bytes of user data actually delivered */
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( des->des_src,
des->des_src_cnt,
sizeof(mca_pml_csum_frag_hdr_t),
req_bytes_delivered );
req_bytes_delivered = mca_pml_csum_compute_segment_length (btl->btl_seg_size,
(void *) des->des_src,
des->des_src_cnt,
sizeof(mca_pml_csum_frag_hdr_t));
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, -1);
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
@ -684,7 +688,7 @@ int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq,
* operation is achieved.
*/
mca_btl_base_descriptor_t* des;
mca_btl_base_descriptor_t* des, *src = NULL;
mca_btl_base_segment_t* segment;
mca_pml_csum_hdr_t* hdr;
bool need_local_cb = false;
@ -694,7 +698,7 @@ int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq,
if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & MCA_BTL_FLAGS_GET)) {
mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
mca_btl_base_descriptor_t* src;
size_t i;
size_t seg_size;
size_t old_position = sendreq->req_send.req_base.req_convertor.bConverted;
MEMCHECKER(
@ -728,10 +732,11 @@ int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq,
src->des_cbfunc = mca_pml_csum_rget_completion;
src->des_cbdata = sendreq;
seg_size = bml_btl->btl->btl_seg_size * src->des_src_cnt;
/* allocate space for get hdr + segment list */
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER,
sizeof(mca_pml_csum_rget_hdr_t) +
(sizeof(mca_btl_base_segment_t) * (src->des_src_cnt-1)),
sizeof(mca_pml_csum_rget_hdr_t) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if( OPAL_UNLIKELY(NULL == des) ) {
opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor,
@ -757,12 +762,8 @@ int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq,
csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_RGET,
sendreq->req_send.req_base.req_proc);
for( i = 0; i < src->des_src_cnt; i++ ) {
hdr->hdr_rget.hdr_segs[i].seg_addr.lval = ompi_ptr_ptol(src->des_src[i].seg_addr.pval);
hdr->hdr_rget.hdr_segs[i].seg_len = src->des_src[i].seg_len;
hdr->hdr_rget.hdr_segs[i].seg_key.key64[0] = src->des_src[i].seg_key.key64[0];
hdr->hdr_rget.hdr_segs[i].seg_key.key64[1] = src->des_src[i].seg_key.key64[1];
}
/* copy segment data */
memmove (&hdr->hdr_rget + 1, src->des_src, seg_size);
des->des_cbfunc = mca_pml_csum_send_ctl_completion;
@ -834,6 +835,10 @@ int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq,
return OMPI_SUCCESS;
}
mca_bml_base_free(bml_btl, des);
if (src) {
mca_bml_base_free (bml_btl, src);
}
return rc;
}
@ -1244,7 +1249,7 @@ int mca_pml_csum_send_request_put_frag( mca_pml_csum_rdma_frag_t* frag )
return OMPI_ERR_OUT_OF_RESOURCE;
}
des->des_dst = frag->rdma_segs;
des->des_dst = (mca_btl_base_segment_t *) frag->rdma_segs;
des->des_dst_cnt = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
des->des_cbfunc = mca_pml_csum_put_completion;
des->des_cbdata = frag;
@ -1298,21 +1303,22 @@ void mca_pml_csum_send_request_put( mca_pml_csum_send_request_t* sendreq,
orte_errmgr.abort(-1, NULL);
}
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
/* setup fragment */
memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
for( i = 0; i < hdr->hdr_seg_cnt; i++ ) {
frag->rdma_segs[i].seg_addr.lval = hdr->hdr_segs[i].seg_addr.lval;
frag->rdma_segs[i].seg_len = hdr->hdr_segs[i].seg_len;
frag->rdma_segs[i].seg_key.key64[0] = hdr->hdr_segs[i].seg_key.key64[0];
frag->rdma_segs[i].seg_key.key64[1] = hdr->hdr_segs[i].seg_key.key64[1];
mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *) ((uintptr_t)(frag->rdma_segs) + i * btl->btl_seg_size);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if ((sendreq->req_send.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
(ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
size += opal_swap_bytes4(frag->rdma_segs[i].seg_len);
size += opal_swap_bytes4(seg->seg_len);
} else
#endif
{
size += frag->rdma_segs[i].seg_len;
size += seg->seg_len;
}
}

Просмотреть файл

@ -284,15 +284,30 @@ void mca_pml_ob1_process_pending_rdma(void);
/*
* Compute the total number of bytes on supplied descriptor
*/
#define MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH(segments, count, hdrlen, length) \
do { \
size_t i; \
\
for( i = 0; i < count; i++ ) { \
length += segments[i].seg_len; \
} \
length -= hdrlen; \
} while(0)
static inline int mca_pml_ob1_compute_segment_length (size_t seg_size, void *segments, size_t count,
size_t hdrlen) {
size_t i, length;
for (i = 0, length = -hdrlen ; i < count ; ++i) {
mca_btl_base_segment_t *segment =
(mca_btl_base_segment_t *)((char *) segments + i * seg_size);
length += segment->seg_len;
}
return length;
}
static inline int mca_pml_ob1_compute_segment_length_base (mca_btl_base_segment_t *segments,
size_t count, size_t hdrlen) {
size_t i, length;
for (i = 0, length = -hdrlen ; i < count ; ++i) {
length += segments[i].seg_len;
}
return length;
}
/* represent BTL chosen for sending request */
struct mca_pml_ob1_com_btl_t {

Просмотреть файл

@ -162,7 +162,6 @@ struct mca_pml_ob1_rget_hdr_t {
uint8_t hdr_padding[4];
#endif
ompi_ptr_t hdr_des; /**< source descriptor */
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
};
typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -38,7 +39,7 @@ struct mca_pml_ob1_rdma_frag_t {
mca_pml_ob1_hdr_t rdma_hdr;
mca_pml_ob1_rdma_state_t rdma_state;
size_t rdma_length;
mca_btl_base_segment_t rdma_segs[MCA_BTL_DES_MAX_SEGMENTS];
uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS];
void *rdma_req;
struct mca_bml_base_endpoint_t* rdma_ep;
opal_convertor_t convertor;

Просмотреть файл

@ -193,8 +193,9 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
size_t bytes_received = 0;
if( OPAL_LIKELY(status == OMPI_SUCCESS) ) {
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( des->des_dst, des->des_dst_cnt,
0, bytes_received );
bytes_received = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
(void *) des->des_dst,
des->des_dst_cnt, 0);
}
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
@ -359,18 +360,13 @@ static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag,
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
mca_btl_base_descriptor_t *ctl;
mca_pml_ob1_rdma_hdr_t *hdr;
size_t hdr_size;
unsigned int i;
size_t seg_size;
int rc;
/* prepare a descriptor for rdma control message */
hdr_size = sizeof (mca_pml_ob1_rdma_hdr_t);
if (dst->des_dst_cnt > 1) {
hdr_size += (sizeof (mca_btl_base_segment_t) *
(dst->des_dst_cnt-1));
}
seg_size = bml_btl->btl->btl_seg_size * dst->des_dst_cnt;
mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, hdr_size,
/* prepare a descriptor for rdma control message */
mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof (mca_pml_ob1_rdma_hdr_t) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
if (OPAL_UNLIKELY(NULL == ctl)) {
@ -391,12 +387,8 @@ static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag,
hdr->hdr_seg_cnt = dst->des_dst_cnt;
for (i = 0 ; i < dst->des_dst_cnt ; ++i) {
hdr->hdr_segs[i].seg_addr.lval = ompi_ptr_ptol(dst->des_dst[i].seg_addr.pval);
hdr->hdr_segs[i].seg_len = dst->des_dst[i].seg_len;
hdr->hdr_segs[i].seg_key.key64[0] = dst->des_dst[i].seg_key.key64[0];
hdr->hdr_segs[i].seg_key.key64[1] = dst->des_dst[i].seg_key.key64[1];
}
/* copy segments */
memcpy (hdr + 1, dst->des_dst, seg_size);
dst->des_cbfunc = mca_pml_ob1_put_completion;
dst->des_cbdata = recvreq;
@ -454,7 +446,7 @@ int mca_pml_ob1_recv_request_get_frag( mca_pml_ob1_rdma_frag_t* frag )
}
}
descriptor->des_src = frag->rdma_segs;
descriptor->des_src = (mca_btl_base_segment_t *) frag->rdma_segs;
descriptor->des_src_cnt = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
descriptor->des_cbfunc = mca_pml_ob1_rget_completion;
descriptor->des_cbdata = frag;
@ -500,13 +492,12 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0, data_offset = 0;
size_t bytes_received, data_offset = 0;
size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_OB1_RECV_REQUEST_UNPACK */
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
bytes_received -= sizeof(mca_pml_ob1_frag_hdr_t);
bytes_received = mca_pml_ob1_compute_segment_length_base (segments, num_segments,
sizeof(mca_pml_ob1_frag_hdr_t));
data_offset = hdr->hdr_frag.hdr_frag_offset;
/*
* Make user buffer accessable(defined) before unpacking.
@ -553,15 +544,12 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0;
mca_pml_ob1_rget_hdr_t* hdr = (mca_pml_ob1_rget_hdr_t*)segments->seg_addr.pval;
mca_bml_base_endpoint_t* bml_endpoint = NULL;
mca_pml_ob1_rdma_frag_t* frag;
size_t i, size = 0;
int rc;
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match);
@ -592,17 +580,22 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
/* lookup bml datastructures */
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_bml;
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
/* allocate/initialize a fragment */
memcpy (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
for(i = 0; i < hdr->hdr_seg_cnt; i++) {
frag->rdma_segs[i] = hdr->hdr_segs[i];
mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *)(frag->rdma_segs + i * btl->btl_seg_size);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if ((recvreq->req_recv.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
(ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
size += opal_swap_bytes4(hdr->hdr_segs[i].seg_len);
size += opal_swap_bytes4(seg->seg_len);
} else
#endif
{
size += hdr->hdr_segs[i].seg_len;
size += seg->seg_len;
}
}
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
@ -657,10 +650,9 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq
size_t data_offset = 0;
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
bytes_received -= sizeof(mca_pml_ob1_rendezvous_hdr_t);
bytes_received = mca_pml_ob1_compute_segment_length_base (segments, num_segments,
sizeof(mca_pml_ob1_rendezvous_hdr_t));
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req;
recvreq->req_rdma_offset = bytes_received;
@ -710,13 +702,13 @@ void mca_pml_ob1_recv_request_progress_match( mca_pml_ob1_recv_request_t* recvre
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0, data_offset = 0;
size_t bytes_received, data_offset = 0;
size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_OB1_RECV_REQUEST_UNPACK */
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received );
bytes_received -= OMPI_PML_OB1_MATCH_HDR_LEN;
bytes_received = mca_pml_ob1_compute_segment_length_base (segments, num_segments,
OMPI_PML_OB1_MATCH_HDR_LEN);
recvreq->req_recv.req_bytes_packed = bytes_received;
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match);
@ -769,12 +761,9 @@ void mca_pml_ob1_recv_request_matched_probe( mca_pml_ob1_recv_request_t* recvreq
switch(hdr->hdr_common.hdr_type) {
case MCA_PML_OB1_HDR_TYPE_MATCH:
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
OMPI_PML_OB1_MATCH_HDR_LEN,
bytes_packed );
bytes_packed = mca_pml_ob1_compute_segment_length_base (segments, num_segments,
OMPI_PML_OB1_MATCH_HDR_LEN);
break;
case MCA_PML_OB1_HDR_TYPE_RNDV:
case MCA_PML_OB1_HDR_TYPE_RGET:
@ -820,8 +809,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
while(bytes_remaining > 0 &&
recvreq->req_pipeline_depth < mca_pml_ob1.recv_pipeline_depth) {
size_t hdr_size;
size_t size;
size_t size, seg_size;
mca_pml_ob1_rdma_hdr_t* hdr;
mca_btl_base_descriptor_t* dst;
mca_btl_base_descriptor_t* ctl;
@ -882,14 +870,10 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
dst->des_cbfunc = mca_pml_ob1_put_completion;
dst->des_cbdata = recvreq;
/* prepare a descriptor for rdma control message */
hdr_size = sizeof(mca_pml_ob1_rdma_hdr_t);
if(dst->des_dst_cnt > 1) {
hdr_size += (sizeof(mca_btl_base_segment_t) *
(dst->des_dst_cnt-1));
}
seg_size = btl->btl_seg_size * dst->des_dst_cnt;
mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, hdr_size,
/* prepare a descriptor for rdma control message */
mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rdma_hdr_t) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
if( OPAL_UNLIKELY(NULL == ctl) ) {
@ -909,12 +893,8 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
hdr->hdr_seg_cnt = dst->des_dst_cnt;
for( i = 0; i < dst->des_dst_cnt; i++ ) {
hdr->hdr_segs[i].seg_addr.lval = ompi_ptr_ptol(dst->des_dst[i].seg_addr.pval);
hdr->hdr_segs[i].seg_len = dst->des_dst[i].seg_len;
hdr->hdr_segs[i].seg_key.key64[0] = dst->des_dst[i].seg_key.key64[0];
hdr->hdr_segs[i].seg_key.key64[1] = dst->des_dst[i].seg_key.key64[1];
}
/* copy segments */
memmove (hdr + 1, dst->des_dst, seg_size);
if(!recvreq->req_ack_sent)
recvreq->req_ack_sent = true;

Просмотреть файл

@ -225,7 +225,7 @@ mca_pml_ob1_rndv_completion( mca_btl_base_module_t* btl,
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
size_t req_bytes_delivered = 0;
size_t req_bytes_delivered;
/* check completion status */
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
@ -238,10 +238,10 @@ mca_pml_ob1_rndv_completion( mca_btl_base_module_t* btl,
* happens in one thread, the increase of the req_bytes_delivered does not
* have to be atomic.
*/
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( des->des_src,
des->des_src_cnt,
sizeof(mca_pml_ob1_rendezvous_hdr_t),
req_bytes_delivered );
req_bytes_delivered = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
(void *) des->des_src,
des->des_src_cnt,
sizeof(mca_pml_ob1_rendezvous_hdr_t));
mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered );
}
@ -259,12 +259,15 @@ mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl,
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
size_t req_bytes_delivered = 0;
size_t req_bytes_delivered;
/* count bytes of user data actually delivered and check for request completion */
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( des->des_src, des->des_src_cnt,
0, req_bytes_delivered );
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
if (OPAL_LIKELY(OMPI_SUCCESS == status)) {
req_bytes_delivered = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
(void *) des->des_src,
des->des_src_cnt, 0);
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
}
sendreq->src_des = NULL;
send_request_pml_complete_check(sendreq);
@ -303,7 +306,7 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
size_t req_bytes_delivered = 0;
size_t req_bytes_delivered;
/* check completion status */
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
@ -313,10 +316,10 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
}
/* count bytes of user data actually delivered */
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( des->des_src,
des->des_src_cnt,
sizeof(mca_pml_ob1_frag_hdr_t),
req_bytes_delivered );
req_bytes_delivered = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
(void *) des->des_src,
des->des_src_cnt,
sizeof(mca_pml_ob1_frag_hdr_t));
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, -1);
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
@ -641,162 +644,104 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq,
*/
mca_btl_base_descriptor_t *des, *src = NULL;
mca_btl_base_segment_t* segment;
mca_pml_ob1_hdr_t* hdr;
bool need_local_cb = false;
mca_pml_ob1_rget_hdr_t *hdr;
size_t seg_size;
int rc;
sendreq->src_des = NULL;
bml_btl = sendreq->req_rdma[0].bml_btl;
if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
size_t i;
size_t old_position = sendreq->req_send.req_base.req_convertor.bConverted;
MEMCHECKER(
memchecker_call(&opal_memchecker_base_mem_defined,
sendreq->req_send.req_base.req_addr,
sendreq->req_send.req_base.req_count,
sendreq->req_send.req_base.req_datatype);
);
/* prepare source descriptor/segment(s) */
/* PML owns this descriptor and will free it in */
/* get_completion */
mca_bml_base_prepare_src( bml_btl,
reg,
&sendreq->req_send.req_base.req_convertor,
MCA_BTL_NO_ORDER, 0, &size,
MCA_BTL_DES_FLAGS_GET | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP,
&src );
MEMCHECKER(
memchecker_call(&opal_memchecker_base_mem_noaccess,
sendreq->req_send.req_base.req_addr,
sendreq->req_send.req_base.req_count,
sendreq->req_send.req_base.req_datatype);
);
if( OPAL_UNLIKELY(NULL == src) ) {
opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor,
&old_position);
return OMPI_ERR_OUT_OF_RESOURCE;
}
src->des_cbfunc = mca_pml_ob1_rget_completion;
src->des_cbdata = sendreq;
sendreq->src_des = src;
/* allocate space for get hdr + segment list */
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER,
sizeof(mca_pml_ob1_rget_hdr_t) +
(sizeof(mca_btl_base_segment_t) * (src->des_src_cnt-1)),
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if( OPAL_UNLIKELY(NULL == des) ) {
opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor,
&old_position );
mca_bml_base_free(bml_btl, src);
return OMPI_ERR_OUT_OF_RESOURCE;
}
segment = des->des_src;
/* build match header */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
hdr->hdr_rget.hdr_des.pval = src;
hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt;
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RGET,
sendreq->req_send.req_base.req_proc);
for( i = 0; i < src->des_src_cnt; i++ ) {
hdr->hdr_rget.hdr_segs[i].seg_addr.lval = ompi_ptr_ptol(src->des_src[i].seg_addr.pval);
hdr->hdr_rget.hdr_segs[i].seg_len = src->des_src[i].seg_len;
#if OMPI_CUDA_SUPPORT_41
memcpy(hdr->hdr_rget.hdr_segs[i].seg_key.cudakey, src->des_src[i].seg_key.cudakey,
sizeof(src->des_src[i].seg_key.cudakey));
hdr->hdr_rget.hdr_segs[i].memh_seg_addr.lval = ompi_ptr_ptol(src->des_src[i].memh_seg_addr.pval);
hdr->hdr_rget.hdr_segs[i].memh_seg_len = src->des_src[i].memh_seg_len;
#else /* OMPI_CUDA_SUPPORT_41 */
hdr->hdr_rget.hdr_segs[i].seg_key.key64[0] = src->des_src[i].seg_key.key64[0];
hdr->hdr_rget.hdr_segs[i].seg_key.key64[1] = src->des_src[i].seg_key.key64[1];
#endif /* OMPI_CUDA_SUPPORT_41 */
}
des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
/**
* Well, it's a get so we will not know when the peer get the data anyway.
* If we generate the PERUSE event here, at least we will know when do we
* sent the GET message ...
*/
if( sendreq->req_send.req_bytes_packed > 0 ) {
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
&(sendreq->req_send.req_base), PERUSE_SEND );
}
} else {
/* allocate a rendezvous header - dont eager send any data
* receiver will schedule rdma put(s) of the entire message
*/
mca_bml_base_alloc(bml_btl, &des,
MCA_BTL_NO_ORDER,
sizeof(mca_pml_ob1_rendezvous_hdr_t),
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if( OPAL_UNLIKELY(NULL == des)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
segment = des->des_src;
/* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV,
sendreq->req_send.req_base.req_proc);
/* update lengths with number of bytes actually packed */
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t);
/* first fragment of a long message */
des->des_cbfunc = mca_pml_ob1_rndv_completion;
need_local_cb = true;
/* wait for ack and completion */
sendreq->req_state = 2;
if (!(bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
/* This BTL does not support get. Use rendezvous to start the RDMA operation using put instead. */
return mca_pml_ob1_send_request_start_rndv (sendreq, bml_btl, 0, MCA_PML_OB1_HDR_FLAGS_CONTIG |
MCA_PML_OB1_HDR_FLAGS_PIN);
}
MEMCHECKER(
memchecker_call(&opal_memchecker_base_mem_defined,
sendreq->req_send.req_base.req_addr,
sendreq->req_send.req_base.req_count,
sendreq->req_send.req_base.req_datatype);
);
/* prepare source descriptor/segment(s) */
/* PML owns this descriptor and will free it in */
/* mca_pml_ob1_rget_completion */
mca_bml_base_prepare_src( bml_btl, sendreq->req_rdma[0].btl_reg,
&sendreq->req_send.req_base.req_convertor,
MCA_BTL_NO_ORDER, 0, &size, MCA_BTL_DES_FLAGS_GET |
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, &src );
MEMCHECKER(
memchecker_call(&opal_memchecker_base_mem_noaccess,
sendreq->req_send.req_base.req_addr,
sendreq->req_send.req_base.req_count,
sendreq->req_send.req_base.req_datatype);
);
if( OPAL_UNLIKELY(NULL == src) ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
src->des_cbfunc = mca_pml_ob1_rget_completion;
src->des_cbdata = sendreq;
sendreq->src_des = src;
seg_size = bml_btl->btl->btl_seg_size * src->des_src_cnt;
/* allocate space for get hdr + segment list */
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof (*hdr) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if( OPAL_UNLIKELY(NULL == des) ) {
/* NTH: no need to reset the converter here. it will be reset before it is retried */
mca_bml_base_free(bml_btl, src);
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* build match header */
hdr = (mca_pml_ob1_rget_hdr_t *) des->des_src->seg_addr.pval;
hdr->hdr_rndv.hdr_match.hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
hdr->hdr_rndv.hdr_match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
hdr->hdr_rndv.hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_rndv.hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_rndv.hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_rndv.hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
hdr->hdr_des.pval = src;
hdr->hdr_seg_cnt = src->des_src_cnt;
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RGET, sendreq->req_send.req_base.req_proc);
/* copy segment data */
memcpy (hdr + 1, src->des_src, seg_size);
des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
des->des_cbdata = sendreq;
/* send */
rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type);
if( OPAL_LIKELY( rc >= 0 ) ) {
if( OPAL_LIKELY( 1 == rc ) && (true == need_local_cb)) {
mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, 0 );
}
return OMPI_SUCCESS;
}
mca_bml_base_free(bml_btl, des);
if (sendreq->src_des) {
mca_bml_base_free (bml_btl, sendreq->src_des);
sendreq->src_des = NULL;
/**
* Well, it's a get so we will not know when the peer will get the data anyway.
* If we generate the PERUSE event here, at least we will know when we
* sent the GET message ...
*/
if( sendreq->req_send.req_bytes_packed > 0 ) {
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
&(sendreq->req_send.req_base), PERUSE_SEND );
}
return rc;
/* send */
rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RGET);
if (OPAL_UNLIKELY(rc < 0)) {
mca_bml_base_free(bml_btl, des);
if (sendreq->src_des) {
mca_bml_base_free (bml_btl, sendreq->src_des);
sendreq->src_des = NULL;
}
return rc;
}
return OMPI_SUCCESS;
}
@ -1198,7 +1143,7 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t *frag )
sendreq->src_des = NULL;
}
des->des_dst = frag->rdma_segs;
des->des_dst = (mca_btl_base_segment_t *) frag->rdma_segs;
des->des_dst_cnt = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
des->des_cbfunc = mca_pml_ob1_put_completion;
des->des_cbdata = frag;
@ -1255,21 +1200,22 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
orte_errmgr.abort(-1, NULL);
}
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
/* setup fragment */
memcpy (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
for( i = 0; i < hdr->hdr_seg_cnt; i++ ) {
frag->rdma_segs[i].seg_addr.lval = hdr->hdr_segs[i].seg_addr.lval;
frag->rdma_segs[i].seg_len = hdr->hdr_segs[i].seg_len;
frag->rdma_segs[i].seg_key.key64[0] = hdr->hdr_segs[i].seg_key.key64[0];
frag->rdma_segs[i].seg_key.key64[1] = hdr->hdr_segs[i].seg_key.key64[1];
mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *) ((uintptr_t)(frag->rdma_segs) + i * btl->btl_seg_size);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if ((sendreq->req_send.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
(ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
size += opal_swap_bytes4(frag->rdma_segs[i].seg_len);
size += opal_swap_bytes4(seg->seg_len);
} else
#endif
{
size += frag->rdma_segs[i].seg_len;
size += seg->seg_len;
}
}