1
1

btl-portals4: implement the BTL 3.0 interface

Этот коммит содержится в:
Todd Kordenbrock 2015-03-12 11:57:48 -05:00
родитель 714d9aa67e
Коммит d1656347c8
6 изменённых файлов: 135 добавлений и 75 удалений

Просмотреть файл

@ -39,6 +39,17 @@
#include "btl_portals4.h"
#include "btl_portals4_recv.h"
mca_btl_base_registration_handle_t *
mca_btl_portals4_register_mem(mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
void *base,
size_t size,
uint32_t flags);
int mca_btl_portals4_deregister_mem(mca_btl_base_module_t *btl,
mca_btl_base_registration_handle_t *handle);
mca_btl_portals4_module_t mca_btl_portals4_module = {
.super = {
.btl_component = &mca_btl_portals4_component.super,
@ -52,7 +63,8 @@ mca_btl_portals4_module_t mca_btl_portals4_module = {
.btl_alloc = mca_btl_portals4_alloc,
.btl_free = mca_btl_portals4_free,
.btl_prepare_src = mca_btl_portals4_prepare_src,
.btl_prepare_dst = mca_btl_portals4_prepare_dst,
.btl_register_mem = mca_btl_portals4_register_mem,
.btl_deregister_mem = mca_btl_portals4_deregister_mem,
.btl_send = mca_btl_portals4_send,
.btl_get = mca_btl_portals4_get,
.btl_dump = mca_btl_base_dump,
@ -222,7 +234,7 @@ mca_btl_portals4_alloc(struct mca_btl_base_module_t* btl_base,
}
frag->md_h = PTL_INVALID_HANDLE;
frag->base.des_local_count = 1;
frag->base.des_segment_count = 1;
frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.order = MCA_BTL_NO_ORDER;
@ -274,7 +286,6 @@ mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
@ -312,7 +323,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
}
frag->segments[0].base.seg_len = max_data + reserve;
frag->base.des_local_count = 1;
frag->base.des_segment_count = 1;
} else {
/* no need to pack - rdma operation out of user's buffer */
@ -347,7 +358,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
frag->segments[0].base.seg_len = max_data;
frag->segments[0].base.seg_addr.pval = iov.iov_base;
frag->segments[0].key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
frag->base.des_local_count = 1;
frag->base.des_segment_count = 1;
/* either a put or get. figure out which later */
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
@ -398,58 +409,50 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
(void *)frag, frag->me_h, me.start, me.length,
me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits));
}
frag->base.des_local = &frag->segments[0].base;
frag->base.des_remote = NULL;
frag->base.des_remote_count = 0;
frag->base.des_segments = &frag->segments[0].base;
frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.order = MCA_BTL_NO_ORDER;
return &frag->base;
}
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_dst(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags)
mca_btl_base_registration_handle_t *
mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
mca_btl_base_endpoint_t *endpoint,
void *base,
size_t size,
uint32_t flags)
{
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
mca_btl_portals4_frag_t* frag;
struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
mca_btl_base_registration_handle_t *handle = NULL;
/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (2)\n"));
mca_btl_portals4_component_progress();
}
OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
if (NULL == frag) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
handle = (mca_btl_base_registration_handle_t *)malloc(sizeof(mca_btl_base_registration_handle_t));
if (!handle) {
return NULL;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_prepare_dst: Incrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
frag->segments[0].base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segments[0].base.seg_addr.pval) );
frag->segments[0].key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
frag->base.des_remote = NULL;
frag->base.des_remote_count = 0;
frag->base.des_local = &frag->segments[0].base;
frag->base.des_local_count = 1;
frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.order = MCA_BTL_NO_ORDER;
frag->md_h = PTL_INVALID_HANDLE;
handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_prepare_dst &base=%p reserve=%ld size=%ld rank=%x pid=%x key=%ld\n",
(void *)&frag->base, reserve, *size, peer->ptl_proc.rank, peer->ptl_proc.phys.pid, frag->segments[0].key));
return &frag->base;
"mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld\n",
portals4_btl->interface_num, base, size, (void *)handle, handle->key));
return handle;
}
int
mca_btl_portals4_deregister_mem(mca_btl_base_module_t *btl_base,
mca_btl_base_registration_handle_t *handle)
{
struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld\n",
portals4_btl->interface_num, (void *)handle, handle->key));
free(handle);
return OPAL_SUCCESS;
}
int

Просмотреть файл

@ -238,23 +238,12 @@ int mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_dst(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
@ -279,10 +268,24 @@ int mca_btl_portals4_put(struct mca_btl_base_module_t* btl_base,
int mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor);
void *local_address,
uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle,
size_t size,
int flags,
int order,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext,
void *cbdata);
int mca_btl_portals4_get_error(int ptl_error);
struct mca_btl_base_registration_handle_t {
/** Portals4 match bits */
ptl_match_bits_t key;
};
/*
* global structures
*/

Просмотреть файл

@ -222,7 +222,17 @@ mca_btl_portals4_component_open(void)
mca_btl_portals4_module.super.btl_flags =
MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_RDMA_MATCHED;
mca_btl_portals4_module.super.btl_seg_size = sizeof (mca_btl_portals4_segment_t);
mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX;
mca_btl_portals4_module.super.btl_put_limit = 0; /* not implemented */
mca_btl_portals4_module.super.btl_get_alignment = 0;
mca_btl_portals4_module.super.btl_put_alignment = 0;
mca_btl_portals4_module.super.btl_get_local_registration_threshold = 0;
mca_btl_portals4_module.super.btl_put_local_registration_threshold = 0;
mca_btl_portals4_module.super.btl_bandwidth = 1000;
mca_btl_portals4_module.super.btl_latency = 0;
@ -770,8 +780,8 @@ mca_btl_portals4_component_progress(void)
tag = (unsigned char) (ev.hdr_data);
btl_base_descriptor.des_local = seg;
btl_base_descriptor.des_local_count = 1;
btl_base_descriptor.des_segments = seg;
btl_base_descriptor.des_segment_count = 1;
seg[0].seg_addr.pval = ev.start;
seg[0].seg_len = ev.mlength;
@ -785,6 +795,8 @@ mca_btl_portals4_component_progress(void)
case PTL_EVENT_PUT_OVERFLOW:
/* */
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_OVERFLOW received\n"));
goto done;
break;
@ -810,8 +822,10 @@ mca_btl_portals4_component_progress(void)
goto done;
break;
case PTL_EVENT_GET:
case PTL_EVENT_GET: /* Generated on source (target) when a get from memory ends */
/* */
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_GET received at target rlength=%ld mlength=%ld\n", ev.rlength, ev.mlength));
goto done;
break;
@ -849,11 +863,14 @@ mca_btl_portals4_component_progress(void)
}
else {
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_REPLY: Call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
frag->base.des_cbfunc(&portals4_btl->super,
frag->endpoint,
&frag->base,
OPAL_SUCCESS);
"PTL_EVENT_REPLY: Call to rdma_cbfunc=%p\n", (void *)frag->rdma_cb.func));
frag->rdma_cb.func(&portals4_btl->super,
frag->endpoint,
ev.start,
frag->rdma_cb.local_handle,
frag->rdma_cb.context,
frag->rdma_cb.data,
OPAL_SUCCESS);
PtlMDRelease(frag->md_h);
frag->md_h = PTL_INVALID_HANDLE;

Просмотреть файл

@ -27,8 +27,8 @@ static void
mca_btl_portals4_frag_common_send_constructor(mca_btl_portals4_frag_t* frag)
{
frag->base.des_flags = 0;
frag->base.des_local = &frag->segments[0].base;
frag->base.des_local_count = 2;
frag->base.des_segments = &frag->segments[0].base;
frag->base.des_segment_count = 2;
frag->segments[0].base.seg_addr.pval = frag + 1;
frag->segments[0].base.seg_len = frag->size;

Просмотреть файл

@ -57,6 +57,14 @@ struct mca_btl_portals4_frag_t {
/* length for retransmit case */
ptl_process_t peer_proc;
/* the callback and context to complete an RDMA operation */
struct {
mca_btl_base_rdma_completion_fn_t func;
void *context;
void *data;
mca_btl_base_registration_handle_t *local_handle;
} rdma_cb;
enum { BTL_PORTALS4_FRAG_TYPE_EAGER,
BTL_PORTALS4_FRAG_TYPE_MAX,
BTL_PORTALS4_FRAG_TYPE_USER } type;

Просмотреть файл

@ -37,23 +37,52 @@ mca_btl_portals4_put(struct mca_btl_base_module_t* btl_base,
int
mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor)
void *local_address,
uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle,
size_t size,
int flags,
int order,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext,
void *cbdata)
{
mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base;
mca_btl_portals4_segment_t *src_seg = (mca_btl_portals4_segment_t *) descriptor->des_remote;
mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor;
mca_btl_portals4_frag_t *frag = NULL;
ptl_md_t md;
int ret;
/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
mca_btl_portals4_component_progress();
}
OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
if (NULL == frag){
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
return OPAL_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_get frag=%p src_seg=%p frag->md_h=%d\n", (void *)frag, (void *)src_seg, frag->md_h));
"mca_btl_portals4_prepare_src: Incrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_get frag=%p\n", (void *)frag));
frag->rdma_cb.func = cbfunc;
frag->rdma_cb.context = cbcontext;
frag->rdma_cb.data = cbdata;
frag->rdma_cb.local_handle = local_handle;
frag->endpoint = btl_peer;
frag->hdr.tag = MCA_BTL_TAG_MAX;
/* Bind the memory */
md.start = (void *)frag->segments[0].base.seg_addr.pval;
md.length = frag->segments[0].base.seg_len;
md.start = (void *)local_address;
md.length = size;
md.options = 0;
md.eq_handle = portals4_btl->recv_eq_h;
md.ct_handle = PTL_CT_NONE;
@ -69,7 +98,7 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
return OPAL_ERROR;
}
frag->match_bits = src_seg->key;
frag->match_bits = remote_handle->key;
frag->length = md.length;
frag->peer_proc = btl_peer->ptl_proc;
ret = PtlGet(frag->md_h,