Fix DR PML after the great MTL crusade.. Added a bit of debugging while I was
in there trying to track things down.. This commit was SVN r11208.
Этот коммит содержится в:
родитель
1cf4d0bd18
Коммит
84e7b90a19
@ -162,10 +162,11 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
|
||||
|
||||
endpoint = OBJ_NEW(mca_pml_dr_endpoint_t);
|
||||
endpoint->src = mca_pml_dr.my_rank;
|
||||
endpoint->proc_ompi = procs[i];
|
||||
procs[i]->proc_pml = (struct mca_pml_base_endpoint_t*) endpoint;
|
||||
|
||||
MCA_PML_DR_DEBUG(10, (0, "%s:%d: adding endpoint 0x%08x to proc_pml 0x%08x\n",
|
||||
__FILE__, __LINE__, endpoint, procs[i]));
|
||||
|
||||
/* this won't work for comm spawn and other dynamic
|
||||
processes, but will work for initial job start */
|
||||
idx = ompi_pointer_array_add(&mca_pml_dr.endpoints,
|
||||
@ -176,9 +177,17 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
mca_pml_dr.my_rank = idx;
|
||||
}
|
||||
endpoint->local = endpoint->dst = idx;
|
||||
MCA_PML_DR_DEBUG(10, (0, "%s:%d: setting endpoint->dst to %d\n",
|
||||
__FILE__, __LINE__, idx));
|
||||
|
||||
endpoint->bml_endpoint = bml_endpoints[i];
|
||||
}
|
||||
|
||||
|
||||
for(i = 0; i < nprocs; i++) {
|
||||
mca_pml_dr_endpoint_t* ep = (mca_pml_dr_endpoint_t*)
|
||||
ompi_pointer_array_get_item(&mca_pml_dr.endpoints, i);
|
||||
ep->src = mca_pml_dr.my_rank;
|
||||
}
|
||||
/* no longer need this */
|
||||
if ( NULL != bml_endpoints ) {
|
||||
free ( bml_endpoints) ;
|
||||
|
@ -242,7 +242,7 @@ MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, des, \
|
||||
#endif
|
||||
|
||||
|
||||
#define MCA_PML_DR_DEBUG_LEVEL 0
|
||||
#define MCA_PML_DR_DEBUG_LEVEL -1
|
||||
#define MCA_PML_DR_DEBUG(level,msg) \
|
||||
if(level <= MCA_PML_DR_DEBUG_LEVEL){ \
|
||||
OPAL_OUTPUT(msg); \
|
||||
|
@ -113,9 +113,15 @@ void mca_pml_dr_recv_frag_callback(
|
||||
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
if(segments->seg_len < sizeof(mca_pml_dr_common_hdr_t)) {
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: wtf? %d\n",
|
||||
__FILE__, __LINE__));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: got a hdr of type %d\n",
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_type));
|
||||
|
||||
switch(hdr->hdr_common.hdr_type) {
|
||||
case MCA_PML_DR_HDR_TYPE_MATCH:
|
||||
{
|
||||
@ -951,8 +957,10 @@ rematch:
|
||||
* descriptor */
|
||||
frag->request=match;
|
||||
match->req_proc = proc;
|
||||
match->req_endpoint = (mca_pml_dr_endpoint_t*)proc->ompi_proc->proc_bml;
|
||||
|
||||
match->req_endpoint = (mca_pml_dr_endpoint_t*)proc->ompi_proc->proc_pml;
|
||||
MCA_PML_DR_DEBUG(10, (0, "%s:%d: adding endpoint 0x%08x match 0x%08x\n",
|
||||
__FILE__, __LINE__, proc->ompi_proc->proc_pml, match->req_endpoint));
|
||||
|
||||
/* add this fragment descriptor to the list of
|
||||
* descriptors to be processed later
|
||||
*/
|
||||
|
@ -104,6 +104,7 @@ do {
|
||||
sendreq->req_send.req_base.req_proc->proc_bml; \
|
||||
bool do_csum = mca_pml_dr.enable_csum && \
|
||||
(endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
|
||||
assert(do_csum); \
|
||||
/* increment reference counts */ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
|
@ -70,7 +70,8 @@ static void mca_pml_dr_vfrag_wdog_timeout(int fd, short event, void* data)
|
||||
mca_pml_dr_vfrag_t* vfrag = (mca_pml_dr_vfrag_t*) data;
|
||||
mca_pml_dr_send_request_t* sendreq = vfrag->vf_send.pval;
|
||||
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d:%s: wdog timeout: 0x%08x", __FILE__, __LINE__, __func__, vfrag));
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d:%s: wdog timeout: 0x%08x vid: %d",
|
||||
__FILE__, __LINE__, __func__, vfrag, vfrag->vf_id));
|
||||
|
||||
/* update pending counts */
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,-vfrag->vf_pending);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user