Fix problem in releasing fragments during GET_END event (didn't check that
portals btl has ownership and therefor didn't free the frag as it should) this causes leakage and hangs in MPI_Finalize. Also added a bit more debugging. This commit was SVN r17900.
Этот коммит содержится в:
родитель
c2fd5dd416
Коммит
dcac824f59
@ -1,4 +1,4 @@
|
||||
/*
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -362,10 +363,10 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
|
||||
/* either a put or get. figure out which later */
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma src posted for frag 0x%lx, callback 0x%lx, bits %" PRIu64,
|
||||
"rdma src posted for frag 0x%lx, callback 0x%lx, bits %"PRIu64", flags say %d" ,
|
||||
(unsigned long) frag,
|
||||
(unsigned long) frag->base.des_cbfunc,
|
||||
frag->segments[0].seg_key.key64));
|
||||
frag->segments[0].seg_key.key64, flags));
|
||||
|
||||
/* create a match entry */
|
||||
ret = PtlMEAttach(mca_btl_portals_module.portals_ni_h,
|
||||
@ -457,10 +458,11 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl_base,
|
||||
frag->base.des_flags = flags;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma dest posted for frag 0x%lx, callback 0x%lx, bits %" PRIu64,
|
||||
"rdma dest posted for frag 0x%lx, callback 0x%lx, bits %" PRIu64 " flags %d",
|
||||
(unsigned long) frag,
|
||||
(unsigned long) frag->base.des_cbfunc,
|
||||
frag->segments[0].seg_key.key64));
|
||||
frag->segments[0].seg_key.key64,
|
||||
flags));
|
||||
|
||||
/* create a match entry */
|
||||
ret = PtlMEAttach(mca_btl_portals_module.portals_ni_h,
|
||||
@ -511,12 +513,22 @@ mca_btl_portals_finalize(struct mca_btl_base_module_t *btl_base)
|
||||
int ret;
|
||||
|
||||
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"in mca_btl_portals_finalize"));
|
||||
|
||||
/* sanity check */
|
||||
assert(mca_btl_portals_module.portals_outstanding_ops >= 0);
|
||||
|
||||
/* finalize all communication */
|
||||
while (mca_btl_portals_module.portals_outstanding_ops > 0) {
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"portals_outstanding_ops: %d",
|
||||
mca_btl_portals_module.portals_outstanding_ops));
|
||||
|
||||
mca_btl_portals_component_progress();
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (mca_btl_portals_module.portals_num_procs != 0) {
|
||||
int i;
|
||||
|
||||
@ -546,7 +558,7 @@ mca_btl_portals_finalize(struct mca_btl_base_module_t *btl_base)
|
||||
OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_eager);
|
||||
OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_max);
|
||||
OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_user);
|
||||
|
||||
|
||||
ompi_common_portals_ni_finalize();
|
||||
ompi_common_portals_finalize();
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -331,27 +332,33 @@ mca_btl_portals_component_progress(void)
|
||||
frag = ev.md.user_ptr;
|
||||
btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
num_progressed++;
|
||||
|
||||
|
||||
switch (ev.type) {
|
||||
case PTL_EVENT_GET_START:
|
||||
/* generated on source (target) when a get from memory starts */
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_GET_START for 0x%lx, %d",
|
||||
(unsigned long) frag, (int) ev.hdr_data));
|
||||
|
||||
|
||||
break;
|
||||
|
||||
|
||||
case PTL_EVENT_GET_END:
|
||||
/* generated on source (target) when a get from memory ends */
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_GET_END for 0x%lx, %d",
|
||||
(unsigned long) frag, (int) ev.hdr_data));
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_GET_END for 0x%lx, %d, flags %d",
|
||||
(unsigned long) frag, (int) ev.hdr_data,
|
||||
frag->base.des_flags));
|
||||
|
||||
if( btl_ownership ) {
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"in PTL_EVENT_GET_END received a frag with btl_ownership!"));
|
||||
mca_btl_portals_free(&mca_btl_portals_module.super,
|
||||
&frag->base);
|
||||
}
|
||||
break;
|
||||
|
||||
case PTL_EVENT_PUT_START:
|
||||
/* generated on destination (target) when a put into memory starts */
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_PUT_START for 0x%lx, %d",
|
||||
(unsigned long) frag, (int) ev.hdr_data));
|
||||
|
||||
@ -372,7 +379,7 @@ mca_btl_portals_component_progress(void)
|
||||
|
||||
case PTL_EVENT_PUT_END:
|
||||
/* generated on destination (target) when a put into memory ends */
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_PUT_END for 0x%lx, %d",
|
||||
(unsigned long) frag, (int) ev.hdr_data));
|
||||
|
||||
@ -421,31 +428,33 @@ mca_btl_portals_component_progress(void)
|
||||
mca_btl_portals_return_block_part(&mca_btl_portals_module, block);
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case PTL_EVENT_REPLY_START:
|
||||
/* generated on destination (origin) when a get starts
|
||||
returning data */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_REPLY_START for 0x%lx, %d",
|
||||
(unsigned long) frag, (int) ev.hdr_data));
|
||||
|
||||
|
||||
break;
|
||||
|
||||
|
||||
case PTL_EVENT_REPLY_END:
|
||||
/* generated on destination (origin) when a get is
|
||||
done returning data */
|
||||
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_REPLY_END for 0x%lx",
|
||||
(unsigned long) frag));
|
||||
|
||||
|
||||
/* let the PML know we're done */
|
||||
frag->base.des_cbfunc(&mca_btl_portals_module.super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_SUCCESS);
|
||||
if( btl_ownership ) {
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"in PTL_EVENT_REPLY_END received a frag with btl_ownership!"));
|
||||
mca_btl_portals_free(&mca_btl_portals_module.super,
|
||||
&frag->base);
|
||||
}
|
||||
@ -456,7 +465,7 @@ mca_btl_portals_component_progress(void)
|
||||
/* generated on source (origin) when put starts sending */
|
||||
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_SEND_START for 0x%lx, %d",
|
||||
(unsigned long) frag, (int) ev.hdr_data));
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,6 +26,7 @@
|
||||
static void
|
||||
mca_btl_portals_frag_common_send_constructor(mca_btl_portals_frag_t* frag)
|
||||
{
|
||||
frag->base.des_flags = 0;
|
||||
frag->base.des_dst = 0;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_src = frag->segments;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user