Small fix for persistence receives.
A minor optimization, few typos and extra comments
Этот коммит содержится в:
родитель
e55cc63da9
Коммит
295eec7059
@ -59,6 +59,12 @@ int mca_pml_ob1_irecv_init(void *addr,
|
|||||||
&((recvreq)->req_recv.req_base),
|
&((recvreq)->req_recv.req_base),
|
||||||
PERUSE_RECV);
|
PERUSE_RECV);
|
||||||
|
|
||||||
|
/* Work around a leak in start by marking this request as complete. The
|
||||||
|
* problem occured because we do not have a way to differentiate an
|
||||||
|
* inital request and an incomplete pml request in start. This line
|
||||||
|
* allows us to detect this state. */
|
||||||
|
recvreq->req_recv.req_base.req_pml_complete = true;
|
||||||
|
|
||||||
*request = (ompi_request_t *) recvreq;
|
*request = (ompi_request_t *) recvreq;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -224,9 +224,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
|||||||
num_nodes++; /* update the number of nodes */
|
num_nodes++; /* update the number of nodes */
|
||||||
|
|
||||||
for(j = i+1; j < size; j++)
|
for(j = i+1; j < size; j++)
|
||||||
if( vpids[j] != -1 )
|
if( vpids[i] == vpids[j] )
|
||||||
if( vpids[i] == vpids[j] )
|
vpids[j] = -1;
|
||||||
vpids[j] = -1;
|
|
||||||
}
|
}
|
||||||
if( 0 == num_nodes ) {
|
if( 0 == num_nodes ) {
|
||||||
/* No useful info has been retrieved from the runtime. Fallback
|
/* No useful info has been retrieved from the runtime. Fallback
|
||||||
|
@ -53,7 +53,6 @@
|
|||||||
|
|
||||||
#define IOVEC_MEM_LIMIT 8192
|
#define IOVEC_MEM_LIMIT 8192
|
||||||
|
|
||||||
|
|
||||||
/* the contig versions does not use the stack. They can easily retrieve
|
/* the contig versions does not use the stack. They can easily retrieve
|
||||||
* the status with just the informations from pConvertor->bConverted.
|
* the status with just the informations from pConvertor->bConverted.
|
||||||
*/
|
*/
|
||||||
@ -103,6 +102,7 @@ opal_pack_homogeneous_contig_function( opal_convertor_t* pConv,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int32_t
|
int32_t
|
||||||
opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
|
opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
|
||||||
struct iovec* iov,
|
struct iovec* iov,
|
||||||
@ -331,22 +331,20 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor,
|
|||||||
pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); );
|
pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); );
|
||||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||||
if( 0 == pConvertor->stack_pos ) {
|
if( 0 == pConvertor->stack_pos ) {
|
||||||
/* we lie about the size of the next element in order to
|
/* we're done. Force the exit of the main for loop (around iovec) */
|
||||||
* make sure we exit the main loop.
|
|
||||||
*/
|
|
||||||
*out_size = iov_count;
|
*out_size = iov_count;
|
||||||
goto complete_loop; /* completed */
|
goto complete_loop;
|
||||||
}
|
}
|
||||||
pConvertor->stack_pos--;
|
pConvertor->stack_pos--; /* go one position up on the stack */
|
||||||
pStack--;
|
pStack--;
|
||||||
pos_desc++;
|
pos_desc++; /* and move to the next element */
|
||||||
} else {
|
} else {
|
||||||
pos_desc = pStack->index + 1;
|
pos_desc = pStack->index + 1; /* jump back to the begining of the loop */
|
||||||
if( pStack->index == -1 ) {
|
if( pStack->index == -1 ) { /* If it's the datatype count loop */
|
||||||
pStack->disp += (pData->ub - pData->lb);
|
pStack->disp += (pData->ub - pData->lb); /* jump by the datatype extent */
|
||||||
} else {
|
} else {
|
||||||
assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type );
|
assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type );
|
||||||
pStack->disp += description[pStack->index].loop.extent;
|
pStack->disp += description[pStack->index].loop.extent; /* jump by the loop extent */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
|
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
|
||||||
@ -374,7 +372,6 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor,
|
|||||||
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
|
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
|
||||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||||
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
|
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
complete_loop:
|
complete_loop:
|
||||||
|
@ -49,11 +49,17 @@
|
|||||||
* - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless.
|
* - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline void position_predefined_data( opal_convertor_t* CONVERTOR,
|
/**
|
||||||
dt_elem_desc_t* ELEM,
|
* Advance the current position in the convertor based using the
|
||||||
uint32_t* COUNT,
|
* current element and a left-over counter. Update the head pointer
|
||||||
unsigned char** POINTER,
|
* and the leftover byte space.
|
||||||
size_t* SPACE )
|
*/
|
||||||
|
static inline void
|
||||||
|
position_predefined_data( opal_convertor_t* CONVERTOR,
|
||||||
|
dt_elem_desc_t* ELEM,
|
||||||
|
uint32_t* COUNT,
|
||||||
|
unsigned char** POINTER,
|
||||||
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
uint32_t _copy_count = *(COUNT);
|
uint32_t _copy_count = *(COUNT);
|
||||||
size_t _copy_blength;
|
size_t _copy_blength;
|
||||||
@ -73,11 +79,17 @@ static inline void position_predefined_data( opal_convertor_t* CONVERTOR,
|
|||||||
*(COUNT) -= _copy_count;
|
*(COUNT) -= _copy_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void position_contiguous_loop( opal_convertor_t* CONVERTOR,
|
/**
|
||||||
dt_elem_desc_t* ELEM,
|
* Advance the current position in the convertor based using the
|
||||||
uint32_t* COUNT,
|
* current contiguous loop and a left-over counter. Update the head
|
||||||
unsigned char** POINTER,
|
* pointer and the leftover byte space.
|
||||||
size_t* SPACE )
|
*/
|
||||||
|
static inline void
|
||||||
|
position_contiguous_loop( opal_convertor_t* CONVERTOR,
|
||||||
|
dt_elem_desc_t* ELEM,
|
||||||
|
uint32_t* COUNT,
|
||||||
|
unsigned char** POINTER,
|
||||||
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||||
ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + (ELEM)->loop.items);
|
ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + (ELEM)->loop.items);
|
||||||
|
@ -42,8 +42,8 @@ typedef struct ompi_wait_sync_t {
|
|||||||
/* The loop in release handles a race condition between the signaling
|
/* The loop in release handles a race condition between the signaling
|
||||||
* thread and the destruction of the condition variable. The signaling
|
* thread and the destruction of the condition variable. The signaling
|
||||||
* member will be set to false after the final signaling thread has
|
* member will be set to false after the final signaling thread has
|
||||||
* finished opertating on the sync object. This is done to avoid
|
* finished operating on the sync object. This is done to avoid
|
||||||
* extra atomics in the singalling function and keep it as fast
|
* extra atomics in the signalling function and keep it as fast
|
||||||
* as possible. Note that the race window is small so spinning here
|
* as possible. Note that the race window is small so spinning here
|
||||||
* is more optimal than sleeping since this macro is called in
|
* is more optimal than sleeping since this macro is called in
|
||||||
* the critical path. */
|
* the critical path. */
|
||||||
@ -73,7 +73,7 @@ typedef struct ompi_wait_sync_t {
|
|||||||
|
|
||||||
#define WAIT_SYNC_SIGNALLED(sync){ \
|
#define WAIT_SYNC_SIGNALLED(sync){ \
|
||||||
(sync)->signaling = false; \
|
(sync)->signaling = false; \
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_DECLSPEC int sync_wait_mt(ompi_wait_sync_t *sync);
|
OPAL_DECLSPEC int sync_wait_mt(ompi_wait_sync_t *sync);
|
||||||
static inline int sync_wait_st (ompi_wait_sync_t *sync)
|
static inline int sync_wait_st (ompi_wait_sync_t *sync)
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user