From 295eec7059d3ce856af4e37b276ae81b0fed48f1 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 15 Sep 2016 23:02:59 -0400 Subject: [PATCH] Small fix for persistence receives. A minor optimization, few typos and extra comments --- ompi/mca/pml/ob1/pml_ob1_irecv.c | 6 ++++ .../topo_treematch_dist_graph_create.c | 5 ++- opal/datatype/opal_datatype_pack.c | 21 ++++++------ opal/datatype/opal_datatype_position.c | 32 +++++++++++++------ opal/threads/wait_sync.h | 6 ++-- 5 files changed, 42 insertions(+), 28 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_irecv.c b/ompi/mca/pml/ob1/pml_ob1_irecv.c index ddf6c54db8..65e097f8a9 100644 --- a/ompi/mca/pml/ob1/pml_ob1_irecv.c +++ b/ompi/mca/pml/ob1/pml_ob1_irecv.c @@ -59,6 +59,12 @@ int mca_pml_ob1_irecv_init(void *addr, &((recvreq)->req_recv.req_base), PERUSE_RECV); + /* Work around a leak in start by marking this request as complete. The + * problem occured because we do not have a way to differentiate an + * inital request and an incomplete pml request in start. This line + * allows us to detect this state. */ + recvreq->req_recv.req_base.req_pml_complete = true; + *request = (ompi_request_t *) recvreq; return OMPI_SUCCESS; } diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index 8cf8eb7b86..62460e5c1f 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -224,9 +224,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, num_nodes++; /* update the number of nodes */ for(j = i+1; j < size; j++) - if( vpids[j] != -1 ) - if( vpids[i] == vpids[j] ) - vpids[j] = -1; + if( vpids[i] == vpids[j] ) + vpids[j] = -1; } if( 0 == num_nodes ) { /* No useful info has been retrieved from the runtime. Fallback diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index faa18266e2..08ae1ecf7a 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -53,7 +53,6 @@ #define IOVEC_MEM_LIMIT 8192 - /* the contig versions does not use the stack. They can easily retrieve * the status with just the informations from pConvertor->bConverted. */ @@ -103,6 +102,7 @@ opal_pack_homogeneous_contig_function( opal_convertor_t* pConv, return 0; } + int32_t opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv, struct iovec* iov, @@ -331,22 +331,20 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( 0 == pConvertor->stack_pos ) { - /* we lie about the size of the next element in order to - * make sure we exit the main loop. - */ + /* we're done. Force the exit of the main for loop (around iovec) */ *out_size = iov_count; - goto complete_loop; /* completed */ + goto complete_loop; } - pConvertor->stack_pos--; + pConvertor->stack_pos--; /* go one position up on the stack */ pStack--; - pos_desc++; + pos_desc++; /* and move to the next element */ } else { - pos_desc = pStack->index + 1; - if( pStack->index == -1 ) { - pStack->disp += (pData->ub - pData->lb); + pos_desc = pStack->index + 1; /* jump back to the begining of the loop */ + if( pStack->index == -1 ) { /* If it's the datatype count loop */ + pStack->disp += (pData->ub - pData->lb); /* jump by the datatype extent */ } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); - pStack->disp += description[pStack->index].loop.extent; + pStack->disp += description[pStack->index].loop.extent; /* jump by the loop extent */ } } conv_ptr = pConvertor->pBaseBuf + pStack->disp; @@ -374,7 +372,6 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); - continue; } } complete_loop: diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index f5e51b86f9..c710a4ae3e 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -49,11 +49,17 @@ * - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless. */ -static inline void position_predefined_data( opal_convertor_t* CONVERTOR, - dt_elem_desc_t* ELEM, - uint32_t* COUNT, - unsigned char** POINTER, - size_t* SPACE ) +/** + * Advance the current position in the convertor based using the + * current element and a left-over counter. Update the head pointer + * and the leftover byte space. + */ +static inline void +position_predefined_data( opal_convertor_t* CONVERTOR, + dt_elem_desc_t* ELEM, + uint32_t* COUNT, + unsigned char** POINTER, + size_t* SPACE ) { uint32_t _copy_count = *(COUNT); size_t _copy_blength; @@ -73,11 +79,17 @@ static inline void position_predefined_data( opal_convertor_t* CONVERTOR, *(COUNT) -= _copy_count; } -static inline void position_contiguous_loop( opal_convertor_t* CONVERTOR, - dt_elem_desc_t* ELEM, - uint32_t* COUNT, - unsigned char** POINTER, - size_t* SPACE ) +/** + * Advance the current position in the convertor based using the + * current contiguous loop and a left-over counter. Update the head + * pointer and the leftover byte space. + */ +static inline void +position_contiguous_loop( opal_convertor_t* CONVERTOR, + dt_elem_desc_t* ELEM, + uint32_t* COUNT, + unsigned char** POINTER, + size_t* SPACE ) { ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM); ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + (ELEM)->loop.items); diff --git a/opal/threads/wait_sync.h b/opal/threads/wait_sync.h index 2ec8485b21..8d83effc9c 100644 --- a/opal/threads/wait_sync.h +++ b/opal/threads/wait_sync.h @@ -42,8 +42,8 @@ typedef struct ompi_wait_sync_t { /* The loop in release handles a race condition between the signaling * thread and the destruction of the condition variable. The signaling * member will be set to false after the final signaling thread has - * finished opertating on the sync object. This is done to avoid - * extra atomics in the singalling function and keep it as fast + * finished operating on the sync object. This is done to avoid + * extra atomics in the signalling function and keep it as fast * as possible. Note that the race window is small so spinning here * is more optimal than sleeping since this macro is called in * the critical path. */ @@ -73,7 +73,7 @@ typedef struct ompi_wait_sync_t { #define WAIT_SYNC_SIGNALLED(sync){ \ (sync)->signaling = false; \ -} +} OPAL_DECLSPEC int sync_wait_mt(ompi_wait_sync_t *sync); static inline int sync_wait_st (ompi_wait_sync_t *sync)