1
1

Fix problem in releasing fragments during GET_END event (didn't check that

portals btl has ownership and therefor didn't free the frag as it should) this
causes leakage and hangs in MPI_Finalize. 

Also added a bit more debugging. 

This commit was SVN r17900.
Этот коммит содержится в:
Galen Shipman 2008-03-20 22:46:32 +00:00
родитель c2fd5dd416
Коммит dcac824f59
3 изменённых файлов: 49 добавлений и 26 удалений

Просмотреть файл

@ -1,4 +1,4 @@
/*
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -362,10 +363,10 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
/* either a put or get. figure out which later */
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"rdma src posted for frag 0x%lx, callback 0x%lx, bits %" PRIu64,
"rdma src posted for frag 0x%lx, callback 0x%lx, bits %"PRIu64", flags say %d" ,
(unsigned long) frag,
(unsigned long) frag->base.des_cbfunc,
frag->segments[0].seg_key.key64));
frag->segments[0].seg_key.key64, flags));
/* create a match entry */
ret = PtlMEAttach(mca_btl_portals_module.portals_ni_h,
@ -457,10 +458,11 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl_base,
frag->base.des_flags = flags;
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"rdma dest posted for frag 0x%lx, callback 0x%lx, bits %" PRIu64,
"rdma dest posted for frag 0x%lx, callback 0x%lx, bits %" PRIu64 " flags %d",
(unsigned long) frag,
(unsigned long) frag->base.des_cbfunc,
frag->segments[0].seg_key.key64));
frag->segments[0].seg_key.key64,
flags));
/* create a match entry */
ret = PtlMEAttach(mca_btl_portals_module.portals_ni_h,
@ -511,12 +513,22 @@ mca_btl_portals_finalize(struct mca_btl_base_module_t *btl_base)
int ret;
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"in mca_btl_portals_finalize"));
/* sanity check */
assert(mca_btl_portals_module.portals_outstanding_ops >= 0);
/* finalize all communication */
while (mca_btl_portals_module.portals_outstanding_ops > 0) {
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"portals_outstanding_ops: %d",
mca_btl_portals_module.portals_outstanding_ops));
mca_btl_portals_component_progress();
}
if (mca_btl_portals_module.portals_num_procs != 0) {
int i;
@ -546,7 +558,7 @@ mca_btl_portals_finalize(struct mca_btl_base_module_t *btl_base)
OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_eager);
OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_max);
OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_user);
ompi_common_portals_ni_finalize();
ompi_common_portals_finalize();

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -331,27 +332,33 @@ mca_btl_portals_component_progress(void)
frag = ev.md.user_ptr;
btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
num_progressed++;
switch (ev.type) {
case PTL_EVENT_GET_START:
/* generated on source (target) when a get from memory starts */
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"PTL_EVENT_GET_START for 0x%lx, %d",
(unsigned long) frag, (int) ev.hdr_data));
break;
case PTL_EVENT_GET_END:
/* generated on source (target) when a get from memory ends */
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
"PTL_EVENT_GET_END for 0x%lx, %d",
(unsigned long) frag, (int) ev.hdr_data));
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"PTL_EVENT_GET_END for 0x%lx, %d, flags %d",
(unsigned long) frag, (int) ev.hdr_data,
frag->base.des_flags));
if( btl_ownership ) {
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"in PTL_EVENT_GET_END received a frag with btl_ownership!"));
mca_btl_portals_free(&mca_btl_portals_module.super,
&frag->base);
}
break;
case PTL_EVENT_PUT_START:
/* generated on destination (target) when a put into memory starts */
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"PTL_EVENT_PUT_START for 0x%lx, %d",
(unsigned long) frag, (int) ev.hdr_data));
@ -372,7 +379,7 @@ mca_btl_portals_component_progress(void)
case PTL_EVENT_PUT_END:
/* generated on destination (target) when a put into memory ends */
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"PTL_EVENT_PUT_END for 0x%lx, %d",
(unsigned long) frag, (int) ev.hdr_data));
@ -421,31 +428,33 @@ mca_btl_portals_component_progress(void)
mca_btl_portals_return_block_part(&mca_btl_portals_module, block);
}
break;
case PTL_EVENT_REPLY_START:
/* generated on destination (origin) when a get starts
returning data */
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"PTL_EVENT_REPLY_START for 0x%lx, %d",
(unsigned long) frag, (int) ev.hdr_data));
break;
case PTL_EVENT_REPLY_END:
/* generated on destination (origin) when a get is
done returning data */
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"PTL_EVENT_REPLY_END for 0x%lx",
(unsigned long) frag));
/* let the PML know we're done */
frag->base.des_cbfunc(&mca_btl_portals_module.super,
frag->endpoint,
&frag->base,
OMPI_SUCCESS);
if( btl_ownership ) {
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"in PTL_EVENT_REPLY_END received a frag with btl_ownership!"));
mca_btl_portals_free(&mca_btl_portals_module.super,
&frag->base);
}
@ -456,7 +465,7 @@ mca_btl_portals_component_progress(void)
/* generated on source (origin) when put starts sending */
#if OMPI_ENABLE_DEBUG
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
"PTL_EVENT_SEND_START for 0x%lx, %d",
(unsigned long) frag, (int) ev.hdr_data));

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -25,6 +26,7 @@
static void
mca_btl_portals_frag_common_send_constructor(mca_btl_portals_frag_t* frag)
{
frag->base.des_flags = 0;
frag->base.des_dst = 0;
frag->base.des_dst_cnt = 0;
frag->base.des_src = frag->segments;