* Add multi-device support to the Portals 4 btl.
* Remove use of the Portals 4 proc tag for the btl, as it's causing more problems than its worth. This commit was SVN r30191.
Этот коммит содержится в:
родитель
9fcb46d85a
Коммит
013e0ec771
@ -80,6 +80,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
|
||||
struct mca_btl_base_endpoint_t** btl_peer_data,
|
||||
opal_bitmap_t* reachable)
|
||||
{
|
||||
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
|
||||
int ret;
|
||||
struct ompi_proc_t *curr_proc = NULL;
|
||||
ptl_process_t *id;
|
||||
@ -87,11 +88,10 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
|
||||
bool need_activate = false;
|
||||
|
||||
opal_output_verbose(50, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_add_procs: Adding %d procs (%d)", (int) nprocs,
|
||||
(int) mca_btl_portals4_module.portals_num_procs);
|
||||
"mca_btl_portals4_add_procs: Adding %d procs (%d) for NI %d", (int) nprocs,
|
||||
(int) portals4_btl->portals_num_procs, portals4_btl->interface_num);
|
||||
|
||||
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl_base);
|
||||
if (0 == mca_btl_portals4_module.portals_num_procs) {
|
||||
if (0 == portals4_btl->portals_num_procs) {
|
||||
need_activate = true;
|
||||
}
|
||||
|
||||
@ -107,42 +107,44 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
|
||||
curr_proc, (void**) &id, &size);
|
||||
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output(0, "ompi_modex_recv failed: %d", ret);
|
||||
opal_output_verbose(0, ompi_btl_base_framework.framework_output,
|
||||
"btl/portals4: ompi_modex_recv failed: %d", ret);
|
||||
return ret;
|
||||
} else if (sizeof(ptl_process_t) != size) {
|
||||
opal_output(0, "ompi_modex_recv returned size %d, expected %d",
|
||||
(int) size, (int) sizeof(ptl_process_t));
|
||||
}
|
||||
if (size < sizeof(ptl_process_t)) { /* no available connection */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) {
|
||||
btl_peer_data[i] = malloc(sizeof(mca_btl_base_endpoint_t));
|
||||
if (NULL == btl_peer_data[i]) return OMPI_ERROR;
|
||||
btl_peer_data[i]->ptl_proc = *id;
|
||||
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = btl_peer_data[i];
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"add_procs: nid=%x pid=%x\n", id->phys.nid, id->phys.pid));
|
||||
} else {
|
||||
ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
|
||||
if (proc->phys.nid != id->phys.nid ||
|
||||
proc->phys.pid != id->phys.pid) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: existing peer and modex peer don't match\n",
|
||||
__FILE__, __LINE__);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
btl_peer_data[i] = (mca_btl_base_endpoint_t*) proc;
|
||||
if ((size % sizeof(ptl_process_t)) != 0) {
|
||||
opal_output_verbose(0, ompi_btl_base_framework.framework_output,
|
||||
"btl/portals4: invalid format in modex");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t))));
|
||||
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_num_procs, 1);
|
||||
btl_peer_data[i] = malloc(sizeof(mca_btl_base_endpoint_t));
|
||||
if (NULL == btl_peer_data[i]) return OMPI_ERROR;
|
||||
|
||||
/* If the modex received one id per interface (this is the
|
||||
normal case), store the id of the corresponding
|
||||
interface */
|
||||
if (size / sizeof(ptl_process_t) >= portals4_btl->interface_num)
|
||||
btl_peer_data[i]->ptl_proc = id[portals4_btl->interface_num];
|
||||
else btl_peer_data[i]->ptl_proc = *id;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"add_procs: nid=%x pid=%x for NI %d\n",
|
||||
btl_peer_data[i]->ptl_proc.phys.nid,
|
||||
btl_peer_data[i]->ptl_proc.phys.pid,
|
||||
portals4_btl->interface_num));
|
||||
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, 1);
|
||||
/* and here we can reach */
|
||||
opal_bitmap_set_bit(reachable, i);
|
||||
}
|
||||
if (need_activate && mca_btl_portals4_module.portals_num_procs > 0) {
|
||||
ret = mca_btl_portals4_recv_enable(&mca_btl_portals4_module);
|
||||
if (need_activate && portals4_btl->portals_num_procs > 0) {
|
||||
ret = mca_btl_portals4_recv_enable(portals4_btl);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -153,24 +155,23 @@ mca_btl_portals4_del_procs(struct mca_btl_base_module_t *btl,
|
||||
struct ompi_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **btl_peer_data)
|
||||
{
|
||||
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl;
|
||||
size_t i;
|
||||
|
||||
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl);
|
||||
|
||||
opal_output_verbose(50, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_del_procs: Removing %d procs (%d)", (int) nprocs,
|
||||
(int) mca_btl_portals4_module.portals_num_procs);
|
||||
(int) portals4_btl->portals_num_procs);
|
||||
|
||||
/* See comment in btl_portals4_endpoint.h about why we look at the
|
||||
portals4 entry in proc_endpoints instead of the peer_data */
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) {
|
||||
free(procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = NULL;
|
||||
}
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_num_procs, -1);
|
||||
free(btl_peer_data[i]);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, -1);
|
||||
}
|
||||
|
||||
if (0 == portals4_btl->portals_num_procs)
|
||||
mca_btl_portals4_free_module(portals4_btl);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -181,20 +182,19 @@ mca_btl_portals4_alloc(struct mca_btl_base_module_t* btl_base,
|
||||
size_t size,
|
||||
uint32_t flags)
|
||||
{
|
||||
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
|
||||
mca_btl_portals4_frag_t* frag;
|
||||
|
||||
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl_base);
|
||||
|
||||
if (size <= mca_btl_portals4_module.super.btl_eager_limit) {
|
||||
OMPI_BTL_PORTALS4_FRAG_ALLOC_EAGER(&mca_btl_portals4_module, frag);
|
||||
if (size <= portals4_btl->super.btl_eager_limit) {
|
||||
OMPI_BTL_PORTALS4_FRAG_ALLOC_EAGER(portals4_btl, frag);
|
||||
if (NULL == frag) return NULL;
|
||||
frag->segments[0].base.seg_len = size;
|
||||
} else {
|
||||
OMPI_BTL_PORTALS4_FRAG_ALLOC_MAX(&mca_btl_portals4_module, frag);
|
||||
OMPI_BTL_PORTALS4_FRAG_ALLOC_MAX(portals4_btl, frag);
|
||||
if (NULL == frag) return NULL;
|
||||
frag->segments[0].base.seg_len =
|
||||
size <= mca_btl_portals4_module.super.btl_max_send_size ?
|
||||
size : mca_btl_portals4_module.super.btl_max_send_size ;
|
||||
size <= portals4_btl->super.btl_max_send_size ?
|
||||
size : portals4_btl->super.btl_max_send_size ;
|
||||
}
|
||||
|
||||
frag->md_h = PTL_INVALID_HANDLE;
|
||||
@ -211,28 +211,27 @@ int
|
||||
mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
|
||||
mca_btl_base_descriptor_t* des)
|
||||
{
|
||||
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
|
||||
mca_btl_portals4_frag_t* frag = (mca_btl_portals4_frag_t*) des;
|
||||
|
||||
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl_base);
|
||||
|
||||
if (BTL_PORTALS4_FRAG_TYPE_EAGER == frag->type) {
|
||||
/* don't ever unlink eager frags */
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_EAGER(&mca_btl_portals4_module.super, frag);
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_EAGER(portals4_btl, frag);
|
||||
|
||||
} else if (BTL_PORTALS4_FRAG_TYPE_MAX == frag->type) {
|
||||
if (frag->me_h != PTL_INVALID_HANDLE) {
|
||||
frag->me_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_MAX(&mca_btl_portals4_module.super, frag);
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_MAX(portals4_btl, frag);
|
||||
|
||||
} else if (BTL_PORTALS4_FRAG_TYPE_USER == frag->type) {
|
||||
if (frag->me_h != PTL_INVALID_HANDLE) {
|
||||
frag->me_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_USER(&mca_btl_portals4_module.super, frag);
|
||||
"mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag);
|
||||
} else {
|
||||
return OMPI_ERR_BAD_PARAM;
|
||||
}
|
||||
@ -258,6 +257,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
size_t* size,
|
||||
uint32_t flags)
|
||||
{
|
||||
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
|
||||
mca_btl_portals4_frag_t* frag;
|
||||
size_t max_data = *size;
|
||||
struct iovec iov;
|
||||
@ -294,20 +294,20 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
ptl_me_t me;
|
||||
|
||||
/* reserve space in the event queue for rdma operations immediately */
|
||||
while (OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, 1) >
|
||||
mca_btl_portals4_module.portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
|
||||
portals4_btl->portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
|
||||
mca_btl_portals4_component_progress();
|
||||
}
|
||||
|
||||
OMPI_BTL_PORTALS4_FRAG_ALLOC_USER(&mca_btl_portals4_module.super, frag);
|
||||
OMPI_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
|
||||
if (NULL == frag){
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_prepare_src: Incrementing portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
|
||||
"mca_btl_portals4_prepare_src: Incrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
@ -316,7 +316,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
|
||||
frag->segments[0].base.seg_len = max_data;
|
||||
frag->segments[0].base.seg_addr.pval = iov.iov_base;
|
||||
frag->segments[0].key = OPAL_THREAD_ADD64(&(mca_btl_portals4_module.portals_rdma_key), 1);
|
||||
frag->segments[0].key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
|
||||
frag->base.des_src_cnt = 1;
|
||||
|
||||
/* either a put or get. figure out which later */
|
||||
@ -344,8 +344,8 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
BTL_PORTALS4_SOURCE_MASK;
|
||||
me.ignore_bits = 0;
|
||||
|
||||
ret = PtlMEAppend(mca_btl_portals4_module.portals_ni_h,
|
||||
mca_btl_portals4_module.recv_idx,
|
||||
ret = PtlMEAppend(portals4_btl->portals_ni_h,
|
||||
portals4_btl->recv_idx,
|
||||
&me,
|
||||
PTL_PRIORITY_LIST,
|
||||
frag,
|
||||
@ -354,8 +354,8 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEAppend failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_USER(&mca_btl_portals4_module.super, frag);
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
@ -381,27 +381,28 @@ mca_btl_portals4_prepare_dst(struct mca_btl_base_module_t* btl_base,
|
||||
size_t* size,
|
||||
uint32_t flags)
|
||||
{
|
||||
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
|
||||
mca_btl_portals4_frag_t* frag;
|
||||
|
||||
/* reserve space in the event queue for rdma operations immediately */
|
||||
while (OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, 1) >
|
||||
mca_btl_portals4_module.portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
|
||||
portals4_btl->portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (2)\n"));
|
||||
mca_btl_portals4_component_progress();
|
||||
}
|
||||
|
||||
OMPI_BTL_PORTALS4_FRAG_ALLOC_USER(&mca_btl_portals4_module.super, frag);
|
||||
OMPI_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
|
||||
if (NULL == frag) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_prepare_dst: Incrementing portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
|
||||
"mca_btl_portals4_prepare_dst: Incrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
|
||||
|
||||
frag->segments[0].base.seg_len = *size;
|
||||
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segments[0].base.seg_addr.pval) );
|
||||
frag->segments[0].key = OPAL_THREAD_ADD64(&(mca_btl_portals4_module.portals_rdma_key), 1);
|
||||
frag->segments[0].key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->base.des_dst = &frag->segments[0].base;
|
||||
@ -419,64 +420,95 @@ mca_btl_portals4_prepare_dst(struct mca_btl_base_module_t* btl_base,
|
||||
int
|
||||
mca_btl_portals4_finalize(struct mca_btl_base_module_t *btl)
|
||||
{
|
||||
int ret;
|
||||
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl;
|
||||
|
||||
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl);
|
||||
mca_btl_portals4_free_module(portals4_btl);
|
||||
|
||||
OBJ_DESTRUCT(&portals4_btl->portals_frag_eager);
|
||||
OBJ_DESTRUCT(&portals4_btl->portals_frag_max);
|
||||
OBJ_DESTRUCT(&portals4_btl->portals_frag_user);
|
||||
OBJ_DESTRUCT(&portals4_btl->portals_recv_blocks);
|
||||
|
||||
free(portals4_btl);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_finalize portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
|
||||
"mca_btl_portals4_finalize NI %d: OK\n", portals4_btl->interface_num));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
void mca_btl_portals4_free_module(mca_btl_portals4_module_t *portals4_btl)
|
||||
{
|
||||
int ret;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_free_module portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
|
||||
|
||||
/* sanity check */
|
||||
assert(mca_btl_portals4_module.portals_outstanding_ops >= 0);
|
||||
assert(portals4_btl->portals_outstanding_ops >= 0);
|
||||
|
||||
/* finalize all communication */
|
||||
while (mca_btl_portals4_module.portals_outstanding_ops > 0) {
|
||||
while (portals4_btl->portals_outstanding_ops > 0) {
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_finalize portals_outstanding_ops: %d",
|
||||
mca_btl_portals4_module.portals_outstanding_ops));
|
||||
"mca_btl_portals4_free_module portals_outstanding_ops: %d",
|
||||
portals4_btl->portals_outstanding_ops));
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (3)\n"));
|
||||
mca_btl_portals4_component_progress();
|
||||
}
|
||||
|
||||
PtlMEUnlink(mca_btl_portals4_module.long_overflow_me_h);
|
||||
PtlMDRelease(mca_btl_portals4_module.zero_md_h);
|
||||
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
if (NULL != mca_btl_portals4_module.send_md_hs) {
|
||||
if (NULL != portals4_btl->send_md_hs) {
|
||||
int i;
|
||||
int num_mds = mca_btl_portals4_get_num_mds();
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
if (!PtlHandleIsEqual(mca_btl_portals4_module.send_md_hs[i], PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(mca_btl_portals4_module.send_md_hs[i]);
|
||||
if (!PtlHandleIsEqual(portals4_btl->send_md_hs[i], PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(portals4_btl->send_md_hs[i]);
|
||||
portals4_btl->send_md_hs[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
free(mca_btl_portals4_module.send_md_hs);
|
||||
free(portals4_btl->send_md_hs);
|
||||
portals4_btl->send_md_hs = NULL;
|
||||
}
|
||||
#else
|
||||
if (!PtlHandleIsEqual(mca_btl_portals4_module.send_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(mca_btl_portals4_module.send_md_h);
|
||||
if (!PtlHandleIsEqual(portals4_btl->send_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(portals4_btl->send_md_h);
|
||||
portals4_btl->send_md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
#endif
|
||||
if (!PtlHandleIsEqual(portals4_btl->zero_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(portals4_btl->zero_md_h);
|
||||
portals4_btl->zero_md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
PtlPTFree(mca_btl_portals4_module.portals_ni_h, mca_btl_portals4_module.recv_idx);
|
||||
if (!PtlHandleIsEqual(portals4_btl->long_overflow_me_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMEUnlink(portals4_btl->long_overflow_me_h);
|
||||
portals4_btl->long_overflow_me_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
ret = mca_btl_portals4_recv_disable(&mca_btl_portals4_module);
|
||||
if ((ptl_pt_index_t) ~0UL != mca_btl_portals4_module.recv_idx) {
|
||||
PtlPTFree(portals4_btl->portals_ni_h, portals4_btl->recv_idx);
|
||||
portals4_btl->recv_idx= (ptl_pt_index_t) ~0UL;
|
||||
}
|
||||
|
||||
if (PTL_EQ_NONE != portals4_btl->recv_eq_h) {
|
||||
ret = PtlEQFree(portals4_btl->recv_eq_h);
|
||||
if (PTL_OK != ret) OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Error freeing EQ recv: %d", ret));
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlEQFree: recv_eq_h=%d portals4_btl=%p",
|
||||
portals4_btl->recv_eq_h, (void*)portals4_btl));
|
||||
|
||||
portals4_btl->recv_eq_h = PTL_EQ_NONE;
|
||||
}
|
||||
if (!PtlHandleIsEqual(portals4_btl->portals_ni_h, PTL_INVALID_HANDLE)) {
|
||||
ret = PtlNIFini(portals4_btl->portals_ni_h);
|
||||
if (PTL_OK != ret) OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Error returned by PtlNIFini: %d\n", ret));
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlNIFini: portals_ni_h=%d portals4_btl=%p",
|
||||
portals4_btl->portals_ni_h, (void*)portals4_btl));
|
||||
|
||||
portals4_btl->portals_ni_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
ret = mca_btl_portals4_recv_disable(portals4_btl);
|
||||
if (PTL_OK != ret) OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Error freeing recv list: %d", ret));
|
||||
|
||||
/* destroy eqs */
|
||||
ret = PtlEQFree(mca_btl_portals4_module.recv_eq_h);
|
||||
if (PTL_OK != ret) OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Error freeing EQ recv: %d", ret));
|
||||
|
||||
ret = PtlNIFini(mca_btl_portals4_module.portals_ni_h);
|
||||
if (PTL_OK != ret) OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Error returned by PtlNIFini\n"));
|
||||
PtlFini();
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "mca_btl_portals4_finalize OK\n"));
|
||||
|
||||
/* Maybe other objects have to be freed */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -23,9 +23,6 @@
|
||||
#include <portals4.h>
|
||||
#include <btl_portals4_frag.h>
|
||||
|
||||
#define MEMORY_MAX_SIZE ((long int)1<<48)
|
||||
#define EXTENDED_ADDR (0xffff000000000000)
|
||||
|
||||
#include "opal/class/opal_free_list.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
@ -41,8 +38,10 @@ struct mca_btl_portals4_component_t {
|
||||
/* base BTL component */
|
||||
mca_btl_base_component_2_0_0_t super;
|
||||
|
||||
/* output channel for debugging */
|
||||
int portals_verbosity;
|
||||
unsigned int num_btls;
|
||||
unsigned int max_btls; /* Maximum number of accepted Portals4 cards */
|
||||
|
||||
struct mca_btl_portals4_module_t** btls; /* array of available BTL modules */
|
||||
|
||||
/* initial size of free lists */
|
||||
int portals_free_list_init_num;
|
||||
@ -56,6 +55,19 @@ struct mca_btl_portals4_component_t {
|
||||
|
||||
/* do I need a portals ACK? */
|
||||
int portals_need_ack;
|
||||
|
||||
/** Length of the receive event queues */
|
||||
int recv_queue_size;
|
||||
|
||||
/* number outstanding sends and local rdma */
|
||||
int32_t portals_max_outstanding_ops;
|
||||
|
||||
/* incoming send message receive memory descriptors */
|
||||
int portals_recv_mds_num;
|
||||
int portals_recv_mds_size;
|
||||
|
||||
/** Event queue handles table used in PtlEQPoll */
|
||||
ptl_handle_eq_t *eqs_h;
|
||||
};
|
||||
|
||||
typedef struct mca_btl_portals4_component_t mca_btl_portals4_component_t;
|
||||
@ -68,17 +80,14 @@ struct mca_btl_portals4_module_t {
|
||||
know when to do activation / shutdown */
|
||||
int32_t portals_num_procs;
|
||||
|
||||
/* Process_id */
|
||||
ptl_process_t ptl_process_id;
|
||||
/* number of the interface (btl) */
|
||||
uint32_t interface_num;
|
||||
|
||||
/* fragment free lists */
|
||||
ompi_free_list_t portals_frag_eager;
|
||||
ompi_free_list_t portals_frag_max;
|
||||
ompi_free_list_t portals_frag_user;
|
||||
|
||||
/* incoming send message receive memory descriptors */
|
||||
int portals_recv_mds_num;
|
||||
int portals_recv_mds_size;
|
||||
opal_list_t portals_recv_blocks;
|
||||
|
||||
/** Length of the receive event queues */
|
||||
@ -117,10 +126,6 @@ struct mca_btl_portals4_module_t {
|
||||
|
||||
typedef struct mca_btl_portals4_module_t mca_btl_portals4_module_t;
|
||||
|
||||
extern mca_btl_portals4_module_t mca_btl_portals4_module;
|
||||
|
||||
#define REQ_RECV_TABLE_ID 12
|
||||
|
||||
/* match/ignore bit manipulation
|
||||
*
|
||||
* 0123 4567 01234567 01234567 01234567 01234567 01234567 01234567 01234567
|
||||
@ -168,15 +173,15 @@ extern mca_btl_portals4_module_t mca_btl_portals4_module;
|
||||
* memory.
|
||||
*/
|
||||
static inline void
|
||||
ompi_btl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr)
|
||||
ompi_btl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr, mca_btl_portals4_module_t *portals4_btl)
|
||||
{
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
int mask = (1ULL << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1)) - 1;
|
||||
int which = (((uintptr_t) ptr) >> (OMPI_PORTALS4_MAX_MD_SIZE - 1)) & mask;
|
||||
*md_h = mca_btl_portals4_module.send_md_hs[which];
|
||||
*md_h = portals4_btl->send_md_hs[which];
|
||||
*base_ptr = (void*) (which * (1ULL << (OMPI_PORTALS4_MAX_MD_SIZE - 1)));
|
||||
#else
|
||||
*md_h = mca_btl_portals4_module.send_md_h;
|
||||
*md_h = portals4_btl->send_md_h;
|
||||
*base_ptr = 0;
|
||||
#endif
|
||||
}
|
||||
@ -193,6 +198,7 @@ mca_btl_portals4_get_num_mds(void)
|
||||
}
|
||||
|
||||
int mca_btl_portals4_component_progress(void);
|
||||
void mca_btl_portals4_free_module(mca_btl_portals4_module_t *portals4_btl);
|
||||
|
||||
/* BTL interface functions */
|
||||
int mca_btl_portals4_finalize(struct mca_btl_base_module_t* btl_base);
|
||||
|
@ -38,8 +38,6 @@ int mca_btl_portals4_component_progress(void);
|
||||
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_portals4_component_t mca_btl_portals4_component;
|
||||
|
||||
static unsigned int ompi_btl_portals4_md_size_bit_width;
|
||||
|
||||
mca_btl_portals4_component_t mca_btl_portals4_component = {
|
||||
{
|
||||
/* First, the mca_base_module_t struct containing meta
|
||||
@ -69,6 +67,18 @@ mca_btl_portals4_component_t mca_btl_portals4_component = {
|
||||
static int
|
||||
mca_btl_portals4_component_register(void)
|
||||
{
|
||||
mca_btl_portals4_component.max_btls = 1;
|
||||
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
|
||||
"max_btls",
|
||||
"Maximum number of accepted Portals4 cards",
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_INT,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_btl_portals4_component.max_btls);
|
||||
|
||||
mca_btl_portals4_component.portals_free_list_init_num = 16;
|
||||
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
|
||||
"free_list_init_num",
|
||||
@ -129,7 +139,7 @@ mca_btl_portals4_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&(mca_btl_portals4_component.portals_need_ack));
|
||||
|
||||
mca_btl_portals4_module.recv_queue_size = 1024 * 1024;
|
||||
mca_btl_portals4_component.recv_queue_size = 4 * 1024;
|
||||
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
|
||||
"eq_recv_size",
|
||||
"Size of the receive event queue",
|
||||
@ -139,21 +149,9 @@ mca_btl_portals4_component_register(void)
|
||||
0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&(mca_btl_portals4_module.recv_queue_size));
|
||||
&(mca_btl_portals4_component.recv_queue_size));
|
||||
|
||||
ompi_btl_portals4_md_size_bit_width = 48;
|
||||
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
|
||||
"md_size_bit_width",
|
||||
"Number of bits used to specify the length of an MD to the portals4 library",
|
||||
MCA_BASE_VAR_TYPE_INT,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_btl_portals4_md_size_bit_width);
|
||||
|
||||
mca_btl_portals4_module.portals_max_outstanding_ops = 8 * 1024;
|
||||
mca_btl_portals4_component.portals_max_outstanding_ops = 8 * 1024;
|
||||
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
|
||||
"max_pending_ops",
|
||||
"Maximum number of pending send/rdma frags",
|
||||
@ -163,9 +161,9 @@ mca_btl_portals4_component_register(void)
|
||||
0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&(mca_btl_portals4_module.portals_max_outstanding_ops));
|
||||
&(mca_btl_portals4_component.portals_max_outstanding_ops));
|
||||
|
||||
mca_btl_portals4_module.portals_recv_mds_num = 32;
|
||||
mca_btl_portals4_component.portals_recv_mds_num = 8;
|
||||
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
|
||||
"recv_md_num",
|
||||
"Number of send frag receive descriptors",
|
||||
@ -175,9 +173,9 @@ mca_btl_portals4_component_register(void)
|
||||
0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&(mca_btl_portals4_module.portals_recv_mds_num));
|
||||
&(mca_btl_portals4_component.portals_recv_mds_num));
|
||||
|
||||
mca_btl_portals4_module.portals_recv_mds_size = 64 * 1024;
|
||||
mca_btl_portals4_component.portals_recv_mds_size = 256 * 1024;
|
||||
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
|
||||
"recv_md_size",
|
||||
"Size of send frag receive descriptors",
|
||||
@ -187,14 +185,13 @@ mca_btl_portals4_component_register(void)
|
||||
0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&(mca_btl_portals4_module.portals_recv_mds_size));
|
||||
&(mca_btl_portals4_component.portals_recv_mds_size));
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
mca_btl_portals4_component_open(void)
|
||||
{
|
||||
mca_btl_portals4_component.portals_verbosity = opal_output_get_verbosity(ompi_btl_base_framework.framework_output);
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_btl_base_framework.framework_output, "mca_btl_portals4_component_open\n"));
|
||||
|
||||
/*
|
||||
@ -226,48 +223,6 @@ mca_btl_portals4_component_open(void)
|
||||
mca_btl_portals4_module.send_md_h = PTL_INVALID_HANDLE;
|
||||
#endif
|
||||
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals4_module.portals_frag_eager), ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals4_module.portals_frag_max), ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals4_module.portals_frag_user), ompi_free_list_t);
|
||||
|
||||
/* eager frags */
|
||||
ompi_free_list_init_new(&(mca_btl_portals4_module.portals_frag_eager),
|
||||
sizeof(mca_btl_portals4_frag_eager_t) +
|
||||
mca_btl_portals4_module.super.btl_eager_limit,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_portals4_frag_eager_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_btl_portals4_component.portals_free_list_init_num,
|
||||
mca_btl_portals4_component.portals_free_list_eager_max_num,
|
||||
mca_btl_portals4_component.portals_free_list_inc_num,
|
||||
NULL);
|
||||
|
||||
/* send frags */
|
||||
ompi_free_list_init_new(&(mca_btl_portals4_module.portals_frag_max),
|
||||
sizeof(mca_btl_portals4_frag_max_t) +
|
||||
mca_btl_portals4_module.super.btl_max_send_size,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_portals4_frag_max_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_btl_portals4_component.portals_free_list_init_num,
|
||||
mca_btl_portals4_component.portals_free_list_max_num,
|
||||
mca_btl_portals4_component.portals_free_list_inc_num,
|
||||
NULL);
|
||||
|
||||
/* user frags */
|
||||
ompi_free_list_init_new(&(mca_btl_portals4_module.portals_frag_user),
|
||||
sizeof(mca_btl_portals4_frag_user_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_portals4_frag_user_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_btl_portals4_component.portals_free_list_init_num,
|
||||
mca_btl_portals4_component.portals_free_list_max_num,
|
||||
mca_btl_portals4_component.portals_free_list_inc_num,
|
||||
NULL);
|
||||
|
||||
/* receive block list */
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals4_module.portals_recv_blocks), opal_list_t);
|
||||
|
||||
mca_btl_portals4_module.portals_ni_h = PTL_INVALID_HANDLE;
|
||||
mca_btl_portals4_module.zero_md_h = PTL_INVALID_HANDLE;
|
||||
|
||||
@ -285,15 +240,16 @@ mca_btl_portals4_component_close(void)
|
||||
opal_output_verbose(50, ompi_btl_base_framework.framework_output, "mca_btl_portals4_component_close\n");
|
||||
|
||||
/* release resources */
|
||||
|
||||
/* close debugging stream */
|
||||
opal_output_close(ompi_btl_base_framework.framework_output);
|
||||
ompi_btl_base_framework.framework_output = -1;
|
||||
|
||||
OBJ_DESTRUCT(&mca_btl_portals4_module.portals_frag_eager);
|
||||
OBJ_DESTRUCT(&mca_btl_portals4_module.portals_frag_max);
|
||||
OBJ_DESTRUCT(&mca_btl_portals4_module.portals_frag_user);
|
||||
OBJ_DESTRUCT(&mca_btl_portals4_module.portals_recv_blocks);
|
||||
if (NULL != mca_btl_portals4_component.btls) free(mca_btl_portals4_component.btls);
|
||||
if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h);
|
||||
mca_btl_portals4_component.btls = NULL;
|
||||
mca_btl_portals4_component.eqs_h = NULL;
|
||||
|
||||
PtlFini();
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -302,23 +258,23 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
int ret;
|
||||
ptl_process_t ptl_process_id;
|
||||
mca_btl_portals4_module_t *portals4_btl = NULL;
|
||||
mca_btl_base_module_t **btls = NULL;
|
||||
unsigned int ret, interface;
|
||||
ptl_handle_ni_t *portals4_nis_h = NULL;
|
||||
ptl_process_t *ptl_process_ids = NULL;
|
||||
ptl_md_t md;
|
||||
ptl_me_t me;
|
||||
|
||||
opal_output_verbose(50, ompi_btl_base_framework.framework_output, "mca_btl_portals4_component_init\n");
|
||||
|
||||
mca_btl_base_module_t **btls = malloc(sizeof(mca_btl_base_module_t*));
|
||||
btls[0] = (mca_btl_base_module_t*) &mca_btl_portals4_module;
|
||||
|
||||
if (enable_progress_threads || enable_mpi_threads) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"btl portals4 disabled because threads enabled");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Initialize Portals and create a physical, matching interface */
|
||||
/* Initialize Portals */
|
||||
ret = PtlInit();
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
@ -329,193 +285,294 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlInit OK\n"));
|
||||
|
||||
/*
|
||||
* Initialize a network device
|
||||
* Initialize the network interfaces (try to open the interfaces 0 to (max_btls-1) )
|
||||
*/
|
||||
ret = PtlNIInit(PTL_IFACE_DEFAULT,
|
||||
*num_btls = 0;
|
||||
portals4_nis_h = malloc(mca_btl_portals4_component.max_btls * sizeof(ptl_handle_ni_t));
|
||||
for (interface=0; interface<mca_btl_portals4_component.max_btls; interface++) {
|
||||
|
||||
ret = PtlNIInit((1 == mca_btl_portals4_component.max_btls) ? PTL_IFACE_DEFAULT : interface,
|
||||
PTL_NI_PHYSICAL | PTL_NI_MATCHING,
|
||||
PTL_PID_ANY, /* let library assign our pid */
|
||||
NULL, /* no desired limits */
|
||||
NULL, /* actual limits */
|
||||
&mca_btl_portals4_module.portals_ni_h /* our interface handle */
|
||||
&portals4_nis_h[*num_btls] /* our interface handle */
|
||||
);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlNIInit failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(90, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlNIInit failed for NI %d: %d\n", __FILE__, __LINE__, interface, ret);
|
||||
}
|
||||
else {
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlNIInit OK for NI %d\n", *num_btls));
|
||||
(*num_btls)++;
|
||||
}
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlNIInit OK\n"));
|
||||
if (0 == *num_btls) goto error;
|
||||
|
||||
/* Publish our NID/PID in the modex */
|
||||
ret = PtlGetId(mca_btl_portals4_module.portals_ni_h ,&ptl_process_id);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlGetId failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
/*
|
||||
* Configure the different network interfaces and the associated btl modules
|
||||
*/
|
||||
mca_btl_portals4_component.num_btls = *num_btls;
|
||||
mca_btl_portals4_component.btls = malloc(mca_btl_portals4_component.num_btls * sizeof(mca_btl_portals4_module_t*) );
|
||||
mca_btl_portals4_component.eqs_h = malloc(mca_btl_portals4_component.num_btls * sizeof(ptl_handle_eq_t));
|
||||
ptl_process_ids = malloc(mca_btl_portals4_component.num_btls * sizeof(ptl_process_t) );
|
||||
|
||||
for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
|
||||
mca_btl_portals4_component.btls[interface] = NULL;
|
||||
mca_btl_portals4_component.eqs_h[interface] = PTL_EQ_NONE;
|
||||
}
|
||||
mca_btl_portals4_module.ptl_process_id = ptl_process_id;
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PtlGetId nid=%x pid=%x\n", ptl_process_id.phys.nid, ptl_process_id.phys.pid));
|
||||
for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
|
||||
portals4_btl = malloc(sizeof(mca_btl_portals4_module_t));
|
||||
mca_btl_portals4_component.btls[interface] = portals4_btl;
|
||||
|
||||
ret = ompi_modex_send(&mca_btl_portals4_component.super.btl_version,
|
||||
&ptl_process_id, sizeof(ptl_process_t));
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: ompi_modex_send failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
/* Copy the default module */
|
||||
memcpy(portals4_btl, &mca_btl_portals4_module, sizeof(mca_btl_portals4_module_t));
|
||||
|
||||
/* create event queue */
|
||||
ret = PtlEQAlloc(mca_btl_portals4_module.portals_ni_h,
|
||||
mca_btl_portals4_module.recv_queue_size,
|
||||
&mca_btl_portals4_module.recv_eq_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlEQAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlEQAlloc (recv_queue) OK\n"));
|
||||
portals4_btl->interface_num = interface;
|
||||
portals4_btl->portals_ni_h = portals4_nis_h[interface];
|
||||
portals4_btl->portals_max_outstanding_ops = mca_btl_portals4_component.portals_max_outstanding_ops;
|
||||
|
||||
/* Create recv_idx portal table entrie */
|
||||
ret = PtlPTAlloc(mca_btl_portals4_module.portals_ni_h,
|
||||
PTL_PT_ONLY_USE_ONCE |
|
||||
PTL_PT_ONLY_TRUNCATE,
|
||||
mca_btl_portals4_module.recv_eq_h,
|
||||
REQ_RECV_TABLE_ID,
|
||||
&mca_btl_portals4_module.recv_idx);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlPTAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PtlPTAlloc (recv_idx) OK recv_idx=%d\n", mca_btl_portals4_module.recv_idx));
|
||||
OBJ_CONSTRUCT(&(portals4_btl->portals_frag_eager), ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&(portals4_btl->portals_frag_max), ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&(portals4_btl->portals_frag_user), ompi_free_list_t);
|
||||
|
||||
/* bind zero-length md for sending acks */
|
||||
md.start = NULL;
|
||||
md.length = 0;
|
||||
md.options = 0;
|
||||
md.eq_handle = PTL_EQ_NONE;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
/* eager frags */
|
||||
ompi_free_list_init_new(&(portals4_btl->portals_frag_eager),
|
||||
sizeof(mca_btl_portals4_frag_eager_t) +
|
||||
portals4_btl->super.btl_eager_limit,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_portals4_frag_eager_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_btl_portals4_component.portals_free_list_init_num,
|
||||
mca_btl_portals4_component.portals_free_list_eager_max_num,
|
||||
mca_btl_portals4_component.portals_free_list_inc_num,
|
||||
NULL);
|
||||
|
||||
ret = PtlMDBind(mca_btl_portals4_module.portals_ni_h,
|
||||
&md,
|
||||
&mca_btl_portals4_module.zero_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMDBind (zero-length md) OK\n"));
|
||||
/* send frags */
|
||||
ompi_free_list_init_new(&(portals4_btl->portals_frag_max),
|
||||
sizeof(mca_btl_portals4_frag_max_t) +
|
||||
portals4_btl->super.btl_max_send_size,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_portals4_frag_max_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_btl_portals4_component.portals_free_list_init_num,
|
||||
mca_btl_portals4_component.portals_free_list_max_num,
|
||||
mca_btl_portals4_component.portals_free_list_inc_num,
|
||||
NULL);
|
||||
|
||||
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
|
||||
to have a single MD across all of memory */
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = mca_btl_portals4_get_num_mds();
|
||||
ptl_size_t size = (1ULL << OMPI_PORTALS4_MAX_MD_SIZE) - 1;
|
||||
ptl_size_t offset_unit = (1ULL << OMPI_PORTALS4_MAX_MD_SIZE) / 2;
|
||||
/* user frags */
|
||||
ompi_free_list_init_new(&(portals4_btl->portals_frag_user),
|
||||
sizeof(mca_btl_portals4_frag_user_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_portals4_frag_user_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_btl_portals4_component.portals_free_list_init_num,
|
||||
mca_btl_portals4_component.portals_free_list_max_num,
|
||||
mca_btl_portals4_component.portals_free_list_inc_num,
|
||||
NULL);
|
||||
|
||||
mca_btl_portals4_module.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == mca_btl_portals4_module.send_md_hs) {
|
||||
/* receive block list */
|
||||
OBJ_CONSTRUCT(&(portals4_btl->portals_recv_blocks), opal_list_t);
|
||||
|
||||
/* create event queue */
|
||||
ret = PtlEQAlloc(portals4_btl->portals_ni_h,
|
||||
mca_btl_portals4_component.recv_queue_size,
|
||||
&portals4_btl->recv_eq_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: Error allocating MD array",
|
||||
__FILE__, __LINE__);
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
"%s:%d: PtlEQAlloc failed for NI %d: %d\n",
|
||||
__FILE__, __LINE__, interface, ret);
|
||||
goto error;
|
||||
}
|
||||
mca_btl_portals4_component.eqs_h[interface] = portals4_btl->recv_eq_h;
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PtlEQAlloc (recv_eq=%d) OK for NI %d\n", portals4_btl->recv_eq_h, interface));
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
mca_btl_portals4_module.send_md_hs[i] = PTL_INVALID_HANDLE;
|
||||
/* Create recv_idx portal table entrie */
|
||||
ret = PtlPTAlloc(portals4_btl->portals_ni_h,
|
||||
PTL_PT_ONLY_USE_ONCE |
|
||||
PTL_PT_ONLY_TRUNCATE,
|
||||
portals4_btl->recv_eq_h,
|
||||
PTL_PT_ANY,
|
||||
&portals4_btl->recv_idx);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlPTAlloc failed for NI %d: %d\n",
|
||||
__FILE__, __LINE__, interface, ret);
|
||||
goto error;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PtlPTAlloc (recv_idx) OK for NI %d recv_idx=%d\n", interface, portals4_btl->recv_idx));
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
md.start = (char*) (offset_unit * i);
|
||||
md.length = (i - 1 == num_mds) ? size / 2 : size;
|
||||
md.options = 0;
|
||||
md.eq_handle = mca_btl_portals4_module.recv_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
/* bind zero-length md for sending acks */
|
||||
md.start = NULL;
|
||||
md.length = 0;
|
||||
md.options = 0;
|
||||
md.eq_handle = PTL_EQ_NONE;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
opal_output_verbose(50, ompi_btl_base_framework.framework_output,
|
||||
ret = PtlMDBind(portals4_btl->portals_ni_h,
|
||||
&md,
|
||||
&portals4_btl->zero_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed for NI %d: %d\n",
|
||||
__FILE__, __LINE__, interface, ret);
|
||||
goto error;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PtlMDBind (zero-length md=%d) OK for NI %d\n", portals4_btl->zero_md_h, interface));
|
||||
|
||||
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
|
||||
to have a single MD across all of memory */
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = mca_btl_portals4_get_num_mds();
|
||||
ptl_size_t size = (1ULL << OMPI_PORTALS4_MAX_MD_SIZE) - 1;
|
||||
ptl_size_t offset_unit = (1ULL << OMPI_PORTALS4_MAX_MD_SIZE) / 2;
|
||||
|
||||
portals4_btl->send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == portals4_btl->send_md_hs) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: Error allocating MD array",
|
||||
__FILE__, __LINE__);
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
portals4_btl->send_md_hs[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
md.start = (char*) (offset_unit * i);
|
||||
md.length = (i - 1 == num_mds) ? size / 2 : size;
|
||||
md.options = 0;
|
||||
md.eq_handle = portals4_btl->recv_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
opal_output_verbose(50, ompi_btl_base_framework.framework_output,
|
||||
"Binding md from %p of length %lx",
|
||||
md.start, md.length);
|
||||
|
||||
ret = PtlMDBind(mca_btl_portals4_module.portals_ni_h,
|
||||
ret = PtlMDBind(portals4_btl->portals_ni_h,
|
||||
&md,
|
||||
&mca_btl_portals4_module.send_md_hs[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
&portals4_btl->send_md_hs[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed for NI %d: %d\n",
|
||||
__FILE__, __LINE__, interface, ret);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMDBind (all memory) OK for NI %d\n", interface));
|
||||
}
|
||||
}
|
||||
#else
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = mca_btl_portals4_module.recv_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = portals4_btl->recv_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(mca_btl_portals4_module.portals_ni_h,
|
||||
ret = PtlMDBind(portals4_btl->portals_ni_h,
|
||||
&md,
|
||||
&mca_btl_portals4_module.send_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
&portals4_btl->send_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed for NI %d: %d\n",
|
||||
__FILE__, __LINE__, interface, ret);
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle long overflows */
|
||||
me.start = NULL;
|
||||
me.length = 0;
|
||||
me.ct_handle = PTL_CT_NONE;
|
||||
me.min_free = 0;
|
||||
me.uid = PTL_UID_ANY;
|
||||
me.options = PTL_ME_OP_PUT |
|
||||
PTL_ME_EVENT_LINK_DISABLE |
|
||||
PTL_ME_EVENT_COMM_DISABLE |
|
||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
me.match_bits = BTL_PORTALS4_LONG_MSG;
|
||||
me.ignore_bits = BTL_PORTALS4_CONTEXT_MASK |
|
||||
BTL_PORTALS4_SOURCE_MASK |
|
||||
BTL_PORTALS4_TAG_MASK;
|
||||
ret = PtlMEAppend(mca_btl_portals4_module.portals_ni_h,
|
||||
mca_btl_portals4_module.recv_idx,
|
||||
/* Handle long overflows */
|
||||
me.start = NULL;
|
||||
me.length = 0;
|
||||
me.ct_handle = PTL_CT_NONE;
|
||||
me.min_free = 0;
|
||||
me.uid = PTL_UID_ANY;
|
||||
me.options = PTL_ME_OP_PUT |
|
||||
PTL_ME_EVENT_LINK_DISABLE |
|
||||
PTL_ME_EVENT_COMM_DISABLE |
|
||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
me.match_bits = BTL_PORTALS4_LONG_MSG;
|
||||
me.ignore_bits = BTL_PORTALS4_CONTEXT_MASK |
|
||||
BTL_PORTALS4_SOURCE_MASK |
|
||||
BTL_PORTALS4_TAG_MASK;
|
||||
ret = PtlMEAppend(portals4_btl->portals_ni_h,
|
||||
portals4_btl->recv_idx,
|
||||
&me,
|
||||
PTL_OVERFLOW_LIST,
|
||||
NULL,
|
||||
&mca_btl_portals4_module.long_overflow_me_h);
|
||||
if (PTL_OK != ret) {
|
||||
&portals4_btl->long_overflow_me_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEAppend failed for NI %d: %d\n",
|
||||
__FILE__, __LINE__, interface, ret);
|
||||
goto error;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMEAppend (overflow list) OK for NI %d\n", interface));
|
||||
}
|
||||
free(portals4_nis_h);
|
||||
portals4_nis_h = NULL;
|
||||
|
||||
/* Publish our NID(s)/PID(s) in the modex */
|
||||
for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
|
||||
portals4_btl = mca_btl_portals4_component.btls[interface];
|
||||
ret = PtlGetId(portals4_btl->portals_ni_h ,&ptl_process_ids[interface]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlGetId for NI %d failed: %d\n",
|
||||
__FILE__, __LINE__, interface, ret);
|
||||
goto error;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PtlGetId NI number %d: ni_h=%d PtlGetId : nid=%x pid=%x\n",
|
||||
interface, portals4_btl->portals_ni_h,
|
||||
ptl_process_ids[interface].phys.nid, ptl_process_ids[interface].phys.pid));
|
||||
}
|
||||
ret = ompi_modex_send(&mca_btl_portals4_component.super.btl_version,
|
||||
ptl_process_ids, mca_btl_portals4_component.num_btls * sizeof(ptl_process_t));
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEAppend failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
"%s:%d: ompi_modex_send failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMEAppend (overflow list) OK\n"));
|
||||
free(ptl_process_ids);
|
||||
ptl_process_ids = NULL;
|
||||
|
||||
*num_btls = 1;
|
||||
btls = malloc(mca_btl_portals4_component.num_btls * sizeof(mca_btl_portals4_module_t*) );
|
||||
memcpy(btls , mca_btl_portals4_component.btls,
|
||||
mca_btl_portals4_component.num_btls*sizeof(mca_btl_portals4_module_t*) );
|
||||
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output, "btl portals4 module has been initialized");
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output, "The btl portals4 component has been initialized and uses %d NI(s)",
|
||||
mca_btl_portals4_component.num_btls);
|
||||
|
||||
return btls;
|
||||
|
||||
error:
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output, "Error in mca_btl_portals4_component_init\n");
|
||||
|
||||
free(btls);
|
||||
/* Free also other portals4 resources */
|
||||
if (*num_btls) {
|
||||
if (NULL != portals4_nis_h) free(portals4_nis_h);
|
||||
if (NULL != ptl_process_ids) free(ptl_process_ids);
|
||||
|
||||
for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
|
||||
portals4_btl = mca_btl_portals4_component.btls[interface];
|
||||
if (NULL != portals4_btl) mca_btl_portals4_free_module(portals4_btl);
|
||||
}
|
||||
mca_btl_portals4_component.num_btls = 0;
|
||||
*num_btls = 0;
|
||||
if (NULL != mca_btl_portals4_component.btls) free(mca_btl_portals4_component.btls);
|
||||
if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h);
|
||||
mca_btl_portals4_component.btls = NULL;
|
||||
mca_btl_portals4_component.eqs_h = NULL;
|
||||
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -581,21 +638,24 @@ mca_btl_portals4_get_error(int ptl_error)
|
||||
int
|
||||
mca_btl_portals4_component_progress(void)
|
||||
{
|
||||
mca_btl_portals4_module_t *portals4_btl;
|
||||
int num_progressed = 0;
|
||||
int ret, btl_ownership;
|
||||
mca_btl_portals4_frag_t *frag = NULL;
|
||||
mca_btl_base_tag_t tag;
|
||||
static ptl_event_t ev;
|
||||
unsigned int which;
|
||||
mca_btl_active_message_callback_t* reg;
|
||||
mca_btl_base_segment_t seg[2];
|
||||
|
||||
if (0 == mca_btl_portals4_module.portals_num_procs) return 0;
|
||||
|
||||
while (true) {
|
||||
ret = PtlEQGet(mca_btl_portals4_module.recv_eq_h, &ev);
|
||||
ret = PtlEQPoll(mca_btl_portals4_component.eqs_h, mca_btl_portals4_component.num_btls, 0, &ev, &which);
|
||||
|
||||
if (PTL_OK == ret) {
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlEQGet Event received: %d (%d)\n", ev.type, ev.ni_fail_type));
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlEQPoll Event received: %d (%d) on NI %d\n",
|
||||
ev.type, ev.ni_fail_type, which));
|
||||
num_progressed++;
|
||||
portals4_btl = mca_btl_portals4_component.btls[which];
|
||||
|
||||
switch (ev.type) {
|
||||
|
||||
@ -609,19 +669,19 @@ mca_btl_portals4_component_progress(void)
|
||||
if( MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ){
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_SEND: Direct call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
|
||||
frag->base.des_cbfunc(&mca_btl_portals4_module.super,
|
||||
frag->base.des_cbfunc(&portals4_btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_SUCCESS);
|
||||
}
|
||||
if (btl_ownership) {
|
||||
mca_btl_portals4_free(&mca_btl_portals4_module.super, &frag->base);
|
||||
mca_btl_portals4_free(&portals4_btl->super, &frag->base);
|
||||
}
|
||||
if (0 != frag->size) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n",
|
||||
mca_btl_portals4_module.portals_outstanding_ops));
|
||||
portals4_btl->portals_outstanding_ops));
|
||||
}
|
||||
}
|
||||
|
||||
@ -641,19 +701,19 @@ mca_btl_portals4_component_progress(void)
|
||||
if (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ) {
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_ACK: Call to des_cbfunc %lx\n", (uint64_t)frag->base.des_cbfunc));
|
||||
frag->base.des_cbfunc(&mca_btl_portals4_module.super,
|
||||
frag->base.des_cbfunc(&portals4_btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_SUCCESS);
|
||||
}
|
||||
if (btl_ownership) {
|
||||
mca_btl_portals4_free(&mca_btl_portals4_module.super, &frag->base);
|
||||
mca_btl_portals4_free(&portals4_btl->super, &frag->base);
|
||||
}
|
||||
|
||||
if (0 != frag->size) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", mca_btl_portals4_module.portals_outstanding_ops));
|
||||
"PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops));
|
||||
}
|
||||
|
||||
goto done;
|
||||
@ -673,7 +733,7 @@ mca_btl_portals4_component_progress(void)
|
||||
reg = mca_btl_base_active_message_trigger + tag;
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_PUT: tag=%x frag=%p cbfunc: %lx\n", tag, (void*)frag, (uint64_t)reg->cbfunc));
|
||||
reg->cbfunc(&mca_btl_portals4_module.super, tag, &(frag->base), reg->cbdata);
|
||||
reg->cbfunc(&portals4_btl->super, tag, &(frag->base), reg->cbdata);
|
||||
|
||||
goto done;
|
||||
break;
|
||||
@ -690,7 +750,7 @@ mca_btl_portals4_component_progress(void)
|
||||
|
||||
case PTL_EVENT_AUTO_UNLINK:
|
||||
/* */
|
||||
/* This activation should be done for PTL_EVENT_AUTO_FREE */
|
||||
/* The Priority List is used, so PTL_EVENT_AUTO_FREE will never be received. So, we have to reactivate the block here */
|
||||
mca_btl_portals4_activate_block(ev.user_ptr);
|
||||
goto done;
|
||||
break;
|
||||
@ -719,7 +779,7 @@ mca_btl_portals4_component_progress(void)
|
||||
0,
|
||||
frag->length,
|
||||
frag->peer_proc,
|
||||
mca_btl_portals4_module.recv_idx,
|
||||
portals4_btl->recv_idx,
|
||||
frag->match_bits, /* match bits */
|
||||
0,
|
||||
frag);
|
||||
@ -734,22 +794,22 @@ mca_btl_portals4_component_progress(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"Re-issued PtlGet length=%ld recv_idx=%d pid=%x match_bits=%lx\n",
|
||||
frag->length, mca_btl_portals4_module.recv_idx, frag->peer_proc.phys.pid, frag->match_bits));
|
||||
frag->length, portals4_btl->recv_idx, frag->peer_proc.phys.pid, frag->match_bits));
|
||||
}
|
||||
else {
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_REPLY: Call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
|
||||
frag->base.des_cbfunc(&mca_btl_portals4_module.super,
|
||||
frag->base.des_cbfunc(&portals4_btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_SUCCESS);
|
||||
PtlMDRelease(frag->md_h);
|
||||
frag->md_h = PTL_INVALID_HANDLE;
|
||||
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_USER(&mca_btl_portals4_module.super, frag);
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
OMPI_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
|
||||
"PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
|
||||
goto done;
|
||||
}
|
||||
break;
|
||||
|
@ -29,15 +29,6 @@ BEGIN_C_DECLS
|
||||
* An instance of mca_btl_base_endpoint_t is associated w/ each process
|
||||
* and BTL pair at startup. However, connections to the endpoint
|
||||
* are established dynamically on an as-needed basis:
|
||||
*
|
||||
* The MTL, OSC, and COLL components expect the ptl_process_t to be
|
||||
* hanging off the PORTALS4 tag in the proc_endpoints. That's not
|
||||
* entirely convenient for the BTL, since we get an endpoint, not an
|
||||
* ompi_proc_t. So we store the ptl_process_t in both places. Since
|
||||
* the btl_base_endpoint_t is just a ptl_process_t, we use the same
|
||||
* storage for both. During tear-down, it's entirely possible that
|
||||
* the MTL is going to free the PORTALS4 memory, so we need to be
|
||||
* careful during del_procs.
|
||||
*/
|
||||
struct mca_btl_base_endpoint_t {
|
||||
ptl_process_t ptl_proc;
|
||||
|
@ -49,8 +49,8 @@ mca_btl_portals4_frag_eager_constructor(mca_btl_portals4_frag_t* frag)
|
||||
static void
|
||||
mca_btl_portals4_frag_eager_destructor(mca_btl_portals4_frag_t* frag)
|
||||
{
|
||||
if (PTL_INVALID_HANDLE == frag->me_h) {
|
||||
PtlMDRelease(frag->me_h);
|
||||
if (PTL_INVALID_HANDLE != frag->me_h) {
|
||||
PtlMEUnlink(frag->me_h);
|
||||
frag->me_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ mca_btl_portals4_put(struct mca_btl_base_module_t* btl_base,
|
||||
struct mca_btl_base_endpoint_t* btl_peer,
|
||||
struct mca_btl_base_descriptor_t* descriptor)
|
||||
{
|
||||
opal_output(0, "mca_btl_portals4_put not implemented\n");
|
||||
opal_output(ompi_btl_base_framework.framework_output, "mca_btl_portals4_put not implemented\n");
|
||||
|
||||
MPI_Abort(MPI_COMM_WORLD, 10);
|
||||
return OMPI_SUCCESS;
|
||||
@ -38,6 +38,7 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
|
||||
struct mca_btl_base_endpoint_t* btl_peer,
|
||||
struct mca_btl_base_descriptor_t* descriptor)
|
||||
{
|
||||
mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base;
|
||||
mca_btl_portals4_segment_t *src_seg = (mca_btl_portals4_segment_t *) descriptor->des_src;
|
||||
mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor;
|
||||
ptl_md_t md;
|
||||
@ -46,8 +47,6 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_get frag=%p src_seg=%p frag->md_h=%d\n", (void *)frag, (void *)src_seg, frag->md_h));
|
||||
|
||||
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl_base);
|
||||
|
||||
frag->endpoint = btl_peer;
|
||||
frag->hdr.tag = MCA_BTL_TAG_MAX;
|
||||
|
||||
@ -55,10 +54,10 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
|
||||
md.start = (void *)frag->segments[0].base.seg_addr.pval;
|
||||
md.length = frag->segments[0].base.seg_len;
|
||||
md.options = 0;
|
||||
md.eq_handle = mca_btl_portals4_module.recv_eq_h;
|
||||
md.eq_handle = portals4_btl->recv_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(mca_btl_portals4_module.portals_ni_h,
|
||||
ret = PtlMDBind(portals4_btl->portals_ni_h,
|
||||
&md,
|
||||
&frag->md_h);
|
||||
|
||||
@ -76,7 +75,7 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
|
||||
0,
|
||||
md.length,
|
||||
btl_peer->ptl_proc,
|
||||
mca_btl_portals4_module.recv_idx,
|
||||
portals4_btl->recv_idx,
|
||||
frag->match_bits, /* match bits */
|
||||
0,
|
||||
frag);
|
||||
|
@ -36,7 +36,7 @@ mca_btl_portals4_recv_enable(mca_btl_portals4_module_t *btl)
|
||||
int i;
|
||||
|
||||
/* create the recv blocks */
|
||||
for (i = 0 ; i < btl->portals_recv_mds_num ; ++i) {
|
||||
for (i = 0 ; i < mca_btl_portals4_component.portals_recv_mds_num ; ++i) {
|
||||
mca_btl_portals4_recv_block_t *block =
|
||||
mca_btl_portals4_recv_block_init(btl);
|
||||
if (NULL == block) {
|
||||
@ -74,12 +74,11 @@ mca_btl_portals4_recv_block_init(mca_btl_portals4_module_t *btl)
|
||||
|
||||
block = OBJ_NEW(mca_btl_portals4_recv_block_t);
|
||||
block->btl = btl;
|
||||
block->length = btl->portals_recv_mds_size;
|
||||
block->length = mca_btl_portals4_component.portals_recv_mds_size;
|
||||
block->start = malloc(block->length);
|
||||
if (block->start == NULL) return NULL;
|
||||
|
||||
block->me_h = PTL_INVALID_HANDLE;
|
||||
block->md_h = PTL_INVALID_HANDLE;
|
||||
|
||||
block->full = false;
|
||||
block->pending = 0;
|
||||
|
@ -29,7 +29,6 @@ struct mca_btl_portals4_recv_block_t {
|
||||
void *start;
|
||||
size_t length;
|
||||
ptl_handle_me_t me_h;
|
||||
ptl_handle_md_t md_h;
|
||||
|
||||
volatile bool full;
|
||||
volatile int32_t pending;
|
||||
@ -70,6 +69,7 @@ mca_btl_portals4_activate_block(mca_btl_portals4_recv_block_t *block)
|
||||
ptl_me_t me;
|
||||
ptl_process_t remote_proc;
|
||||
ptl_match_bits_t match_bits, ignore_bits;
|
||||
mca_btl_portals4_module_t *btl = block->btl;
|
||||
|
||||
if (NULL == block->start) return OMPI_ERROR;
|
||||
|
||||
@ -79,7 +79,7 @@ mca_btl_portals4_activate_block(mca_btl_portals4_recv_block_t *block)
|
||||
me.start = block->start;
|
||||
me.length = block->length;
|
||||
me.ct_handle = PTL_CT_NONE;
|
||||
me.min_free = mca_btl_portals4_module.super.btl_eager_limit;
|
||||
me.min_free = btl->super.btl_eager_limit;
|
||||
me.uid = PTL_UID_ANY;
|
||||
me.options =
|
||||
PTL_ME_OP_PUT |
|
||||
@ -98,19 +98,20 @@ mca_btl_portals4_activate_block(mca_btl_portals4_recv_block_t *block)
|
||||
block->full = false;
|
||||
opal_atomic_mb();
|
||||
|
||||
ret = PtlMEAppend(mca_btl_portals4_module.portals_ni_h,
|
||||
mca_btl_portals4_module.recv_idx,
|
||||
ret = PtlMEAppend(btl->portals_ni_h,
|
||||
btl->recv_idx,
|
||||
&me,
|
||||
PTL_PRIORITY_LIST,
|
||||
block,
|
||||
&block->me_h);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEAppend failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
"%s:%d: PtlMEAppend failed on NI %d: %d",
|
||||
__FILE__, __LINE__, btl->interface_num, ret);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMEAppend (recv) block=%p me_h=%d start=%p len=%x\n", (void *)block, block->me_h, block->start, (unsigned int) block->length));
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMEAppend (recv) block=%p me_h=%d start=%p len=%x NI=%d\n",
|
||||
(void *)block, block->me_h, block->start, (unsigned int) block->length, btl->interface_num));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
|
||||
struct mca_btl_base_descriptor_t* descriptor,
|
||||
mca_btl_base_tag_t tag)
|
||||
{
|
||||
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
|
||||
mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor;
|
||||
ptl_match_bits_t match_bits, msglen_type;
|
||||
ptl_size_t put_length;
|
||||
@ -43,45 +44,48 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
|
||||
frag->hdr.tag = tag;
|
||||
|
||||
put_length = frag->segments[0].base.seg_len;
|
||||
if (put_length > mca_btl_portals4_module.super.btl_eager_limit)
|
||||
if (put_length > portals4_btl->super.btl_eager_limit)
|
||||
msglen_type = BTL_PORTALS4_LONG_MSG;
|
||||
else msglen_type = BTL_PORTALS4_SHORT_MSG;
|
||||
|
||||
BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type);
|
||||
|
||||
ompi_btl_portals4_get_md(frag->segments[0].base.seg_addr.pval, &md_h, &base);
|
||||
ompi_btl_portals4_get_md(frag->segments[0].base.seg_addr.pval, &md_h, &base, portals4_btl);
|
||||
offset = (ptl_size_t) ((char*) frag->segments[0].base.seg_addr.pval - (char*) base);
|
||||
|
||||
/* reserve space in the event queue for rdma operations immediately */
|
||||
while (OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, 1) >
|
||||
mca_btl_portals4_module.portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
|
||||
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
|
||||
portals4_btl->portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"Call to mca_btl_portals4_component_progress (4)\n"));
|
||||
mca_btl_portals4_component_progress();
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_send: Incrementing portals_outstanding_ops=%d\n",
|
||||
mca_btl_portals4_module.portals_outstanding_ops));
|
||||
portals4_btl->portals_outstanding_ops));
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_btl_base_framework.framework_output,
|
||||
"PtlPut frag=%p pid=%x tag=%x len=%ld match_bits=%lx\n",
|
||||
(void*)frag, endpoint->ptl_proc.phys.pid, tag,
|
||||
put_length, (uint64_t)match_bits));
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) offset,
|
||||
put_length, /* fragment length */
|
||||
(mca_btl_portals4_component.portals_need_ack ? PTL_ACK_REQ : PTL_NO_ACK_REQ),
|
||||
endpoint->ptl_proc,
|
||||
mca_btl_portals4_module.recv_idx,
|
||||
portals4_btl->recv_idx,
|
||||
match_bits, /* match bits */
|
||||
0, /* remote offset - not used */
|
||||
(void *) frag, /* user ptr */
|
||||
tag); /* hdr_data: tag */
|
||||
if (ret != PTL_OK) {
|
||||
opal_output(0, "mca_btl_portals4_send: PtlPut failed with error %d", ret);
|
||||
opal_output(ompi_btl_base_framework.framework_output, "mca_btl_portals4_send: PtlPut failed with error %d", ret);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlPut frag=%p pid=%x tag=%x addr=%p len=%ld match_bits=%lx\n",
|
||||
(void*)frag, endpoint->ptl_proc.phys.pid, tag, (void *)offset, put_length, (uint64_t)match_bits));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -98,7 +102,7 @@ int mca_btl_portals4_sendi(struct mca_btl_base_module_t* btl_base,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t** des)
|
||||
{
|
||||
opal_output(0, "mca_btl_portals_sendi is not implemented");
|
||||
opal_output(ompi_btl_base_framework.framework_output, "mca_btl_portals_sendi is not implemented");
|
||||
abort();
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -19,16 +19,6 @@
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_ompi_btl_portals4_POST_CONFIG(will_build)
|
||||
# ----------------------------------------
|
||||
# Unlike most of the BTLs, we need to register an endpoint tag so that
|
||||
# we can get our endpoint information from the same place as all the
|
||||
# other Portals components (one-sided, mtl, coll, etc.). See comment
|
||||
# in btl_portals4_endpoint.h for how the pieces fit together.
|
||||
AC_DEFUN([MCA_ompi_btl_portals4_POST_CONFIG], [
|
||||
AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([PORTALS4])])
|
||||
])dnl
|
||||
|
||||
# MCA_btl_portals4_CONFIG(action-if-can-compile,
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user