1
1

Remove one layer of functions.

Lookup for the peer information only when we need it for later usage.
Small optimizations (moving some function in .h and transform them in static inline).
Cleanups, cleanups and finally cleanups ...

This commit was SVN r3870.
Этот коммит содержится в:
George Bosilca 2004-12-22 07:20:05 +00:00
родитель f0d86178bc
Коммит e6fa352437
7 изменённых файлов: 93 добавлений и 98 удалений

Просмотреть файл

@ -46,7 +46,7 @@ mca_ptl_gm_module_t mca_ptl_gm_module = {
mca_ptl_gm_add_procs,
mca_ptl_gm_del_procs,
mca_ptl_gm_finalize,
mca_ptl_gm_send,
mca_ptl_gm_peer_send,
mca_ptl_gm_put,
mca_ptl_gm_get,
mca_ptl_gm_matched,
@ -260,6 +260,7 @@ mca_ptl_gm_request_fini (struct mca_ptl_base_module_t *ptl,
OBJ_DESTRUCT(request+1);
}
#if 0
int
mca_ptl_gm_send (struct mca_ptl_base_module_t *ptl,
struct mca_ptl_base_peer_t *ptl_peer,
@ -289,6 +290,7 @@ mca_ptl_gm_send (struct mca_ptl_base_module_t *ptl,
return OMPI_SUCCESS;
}
#endif /* 0 */
/*
* Initiate a put

Просмотреть файл

@ -133,14 +133,6 @@ extern "C" {
*/
extern int mca_ptl_gm_component_progress (mca_ptl_tstamp_t tstamp);
/**
* GM send
*/
extern int mca_ptl_gm_send( struct mca_ptl_base_module_t *ptl,
struct mca_ptl_base_peer_t *ptl_peer,
struct mca_pml_base_send_request_t *sendreq,
size_t offset, size_t size, int flags);
/**
* GM put
*/

Просмотреть файл

@ -392,14 +392,17 @@ mca_ptl_gm_init( mca_ptl_gm_component_t * gm )
uint32_t save_counter;
/* let's try to find if GM is available */
if( GM_SUCCESS != gm_init() )
if( GM_SUCCESS != gm_init() ) {
ompi_output( 0, "[%s:%d] error in initializing the gm library\n", __FILE__, __LINE__ );
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* First discover all available boards. For each board we will create a unique PTL */
mca_ptl_gm_component.gm_ptl_modules = calloc( mca_ptl_gm_component.gm_max_ptl_modules,
sizeof (mca_ptl_gm_module_t *));
if (NULL == mca_ptl_gm_component.gm_ptl_modules)
if (NULL == mca_ptl_gm_component.gm_ptl_modules) {
ompi_output( 0, "[%s:%d] error in initializing the gm PTL's.\n", __FILE__, __LINE__ );
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_ptl_gm_component.gm_num_ptl_modules =
mca_ptl_gm_discover_boards( mca_ptl_gm_component.gm_ptl_modules,
@ -461,8 +464,8 @@ mca_ptl_gm_component_init (int *num_ptl_modules,
#endif /* OMPI_HAVE_POSIX_THREADS */
if (OMPI_SUCCESS != mca_ptl_gm_init (&mca_ptl_gm_component)) {
ompi_output( 0, "[%s:%d] error in initializing gm state and PTL's.\n",
__FILE__, __LINE__ );
ompi_output( 0, "[%s:%d] error in initializing gm state and PTL's. (%d PTL's)\n",
__FILE__, __LINE__, mca_ptl_gm_component.gm_num_ptl_modules );
return NULL;
}

Просмотреть файл

@ -190,12 +190,12 @@ int mca_ptl_gm_peer_send_continue( mca_ptl_gm_peer_t *ptl_peer,
* Now depending on the quantity of data that have to be transfered and on the flags
* we will add more informations on the header.
*/
int mca_ptl_gm_peer_send( mca_ptl_gm_peer_t *ptl_peer,
mca_ptl_gm_send_frag_t *fragment,
int mca_ptl_gm_peer_send( struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_peer_t* ptl_base_peer,
struct mca_pml_base_send_request_t *sendreq,
size_t offset,
size_t *size,
int flags )
size_t size,
int flags )
{
struct iovec iov;
size_t size_in, size_out;
@ -204,17 +204,24 @@ int mca_ptl_gm_peer_send( mca_ptl_gm_peer_t *ptl_peer,
ompi_convertor_t *convertor = NULL;
int rc, freeAfter;
unsigned int in_size, max_data = 0;
mca_ptl_gm_send_frag_t *fragment;
mca_ptl_gm_peer_t* ptl_peer;
fragment = mca_ptl_gm_alloc_send_frag( (mca_ptl_gm_module_t*)ptl, sendreq );
if( NULL == fragment ) {
ompi_output( 0,"[%s:%d] Unable to allocate a gm send frag\n",
__FILE__, __LINE__ );
return OMPI_ERR_OUT_OF_RESOURCE;
}
hdr = (mca_ptl_base_header_t*)fragment->send_buf;
size_in = *size;
size_in = size;
fragment->send_frag.frag_base.frag_owner = &ptl_peer->peer_ptl->super;
fragment->send_frag.frag_base.frag_peer = (struct mca_ptl_base_peer_t*)ptl_peer;
fragment->send_frag.frag_request = sendreq;
fragment->frag_bytes_processed = 0;
fragment->send_frag.frag_base.frag_peer = ptl_base_peer;
/* At this point the header is already filled up with informations as a match header */
if( (flags & MCA_PTL_FLAGS_ACK) || (0 == offset) ) {
/* At this point the header is already filled up with informations as a match header */
(void)mca_ptl_gm_init_header_match( fragment, sendreq, flags );
if( flags & MCA_PTL_FLAGS_ACK ) {
header_length = sizeof(mca_ptl_base_rendezvous_header_t);
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_RNDV;
@ -227,6 +234,7 @@ int mca_ptl_gm_peer_send( mca_ptl_gm_peer_t *ptl_peer,
} else {
header_length = sizeof(mca_ptl_base_frag_header_t);
hdr->hdr_frag.hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
hdr->hdr_frag.hdr_common.hdr_flags = flags;
hdr->hdr_frag.hdr_frag_length = size_in;
hdr->hdr_frag.hdr_frag_offset = offset;
hdr->hdr_frag.hdr_src_ptr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
@ -288,7 +296,6 @@ int mca_ptl_gm_peer_send( mca_ptl_gm_peer_t *ptl_peer,
GM_SIZE, size_out, GM_LOW_PRIORITY, ptl_peer->local_id,
send_callback, (void *)fragment );
fragment->frag_bytes_processed = size_out - header_length;
*size = fragment->frag_bytes_processed;
if( !(flags & MCA_PTL_FLAGS_ACK) ) {
ptl_peer->peer_ptl->super.ptl_send_progress( (mca_ptl_base_module_t*)ptl_peer->peer_ptl,
fragment->send_frag.frag_request,
@ -453,9 +460,7 @@ mca_ptl_gm_recv_frag_match( struct mca_ptl_gm_module_t *ptl,
/* allocate a receive fragment */
recv_frag = mca_ptl_gm_alloc_recv_frag( (struct mca_ptl_base_module_t*)ptl );
/*recv_frag->frag_recv.frag_base.frag_peer = NULL;
recv_frag->frag_recv.frag_request = NULL;
*/
if( MCA_PTL_HDR_TYPE_MATCH == hdr->hdr_rndv.hdr_match.hdr_common.hdr_type ) {
recv_frag->frag_recv.frag_base.frag_addr =
(char *) hdr + sizeof(mca_ptl_base_match_header_t);
@ -528,24 +533,27 @@ static void mca_ptl_gm_get_callback( struct gm_port *port, void * context, gm_st
length = frag->frag_recv.frag_base.frag_size;
ack = mca_ptl_gm_alloc_send_frag( gm_ptl, NULL );
rc = mca_ptl_gm_send_ack_init( ack, gm_ptl,
(mca_ptl_gm_peer_t *)(frag->frag_recv.frag_base.frag_peer),
(mca_ptl_gm_peer_t*)(frag->frag_recv.frag_base.frag_peer),
frag, NULL,
frag->frag_recv.frag_base.frag_size );
hdr = (mca_ptl_base_header_t*)ack->send_buf;
hdr->hdr_common.hdr_flags |= PTL_FLAG_GM_HAS_FRAGMENT;
frag->frag_bytes_processed += frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_frag_length;
if( frag->frag_recv.frag_base.frag_size <= frag->frag_bytes_processed ) {
/* This request is done. I will send back the FIN message */
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FIN;
}
gm_send_to_peer_with_callback( ((mca_ptl_gm_module_t*)(ack->send_frag.frag_base.frag_owner))->gm_port,
ack->send_buf, GM_SIZE, sizeof(mca_ptl_base_ack_header_t),
GM_LOW_PRIORITY, peer->local_id, send_callback, (void*)ack );
if( frag->frag_recv.frag_base.frag_size <= frag->frag_bytes_processed ) {
gm_ptl->super.ptl_recv_progress( (mca_ptl_base_module_t*)gm_ptl,
request,
frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_frag_length,
frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_frag_length );
/* This request is done. I will send back the FIN message */
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FIN;
OMPI_FREE_LIST_RETURN( &(gm_ptl->gm_recv_frags_free), (ompi_list_item_t*)frag );
}
gm_send_to_peer_with_callback( ((mca_ptl_gm_module_t*)(ack->send_frag.frag_base.frag_owner))->gm_port,
ack->send_buf, GM_SIZE, sizeof(mca_ptl_base_ack_header_t),
GM_LOW_PRIORITY, peer->local_id, send_callback, (void*)ack );
status = gm_deregister_memory( ((mca_ptl_gm_module_t*)(ack->send_frag.frag_base.frag_owner))->gm_port,
pointer, length );
if( GM_SUCCESS != status ) {
@ -568,7 +576,7 @@ mca_ptl_gm_recv_frag_frag( struct mca_ptl_gm_module_t *ptl,
gm_recv_event_t* event )
{
mca_pml_base_recv_request_t *request;
ompi_convertor_t* convertor = NULL;
ompi_convertor_t local_convertor, *convertor;
mca_ptl_base_header_t *hdr;
struct iovec iov;
uint32_t iov_count, max_data;
@ -590,16 +598,14 @@ mca_ptl_gm_recv_frag_frag( struct mca_ptl_gm_module_t *ptl,
if( hdr->hdr_frag.hdr_frag_length <= (GM_BUF_SIZE - sizeof(mca_ptl_base_frag_header_t)) ) {
ompi_proc_t* proc = ompi_comm_peer_lookup( request->req_base.req_comm,
request->req_base.req_ompi.req_status.MPI_SOURCE );
convertor = ompi_convertor_get_copy( proc->proc_convertor );
convertor = &local_convertor;
ompi_convertor_copy( proc->proc_convertor, convertor );
recv_frag = NULL;
} else { /* large message => we have to create a receive fragment */
recv_frag = mca_ptl_gm_alloc_recv_frag( (struct mca_ptl_base_module_t*)ptl );
recv_frag->frag_recv.frag_request = request;
recv_frag->frag_recv.frag_base.frag_header.hdr_frag = hdr->hdr_frag;
recv_frag->frag_recv.frag_base.frag_peer =
mca_pml_teg_proc_lookup_remote_peer( request->req_base.req_comm,
request->req_base.req_ompi.req_status.MPI_SOURCE,
(struct mca_ptl_base_module_t*)ptl );
recv_frag->frag_recv.frag_base.frag_peer = NULL;
recv_frag->frag_offset = hdr->hdr_frag.hdr_frag_offset;
recv_frag->matched = true;
recv_frag->frag_bytes_processed = 0;
@ -621,16 +627,10 @@ mca_ptl_gm_recv_frag_frag( struct mca_ptl_gm_module_t *ptl,
freeAfter = 0; /* unused here */
rc = ompi_convertor_unpack( convertor, &iov, &iov_count, &max_data, &freeAfter );
assert( 0 == freeAfter );
ptl->super.ptl_recv_progress( (mca_ptl_base_module_t*)ptl, request, max_data, max_data );
if( PTL_FLAG_GM_LAST_FRAGMENT & hdr->hdr_common.hdr_flags ) {
/* I'm done with this fragment. Return it to the free list */
if( NULL != recv_frag ) {
OMPI_FREE_LIST_RETURN( &(ptl->gm_recv_frags_free), (ompi_list_item_t*)recv_frag );
}
/* All the data transferred. Update the receive request */
ptl->super.ptl_recv_progress( (mca_ptl_base_module_t*)ptl, request, max_data, max_data );
}
if( NULL == recv_frag ) {
OBJ_RELEASE( convertor );
}
} else {
gm_status_t status;
ompi_ptr_t* remote_memory = (ompi_ptr_t*)((char*)hdr + sizeof(mca_ptl_base_frag_header_t));
@ -644,7 +644,13 @@ mca_ptl_gm_recv_frag_frag( struct mca_ptl_gm_module_t *ptl,
(void*)request->req_base.req_addr, hdr->hdr_frag.hdr_frag_length );
return NULL;
}
peer = (mca_ptl_gm_peer_t*)recv_frag->frag_recv.frag_base.frag_peer;
if( NULL == recv_frag->frag_recv.frag_base.frag_peer ) {
recv_frag->frag_recv.frag_base.frag_peer = (struct mca_ptl_base_peer_t*)
mca_pml_teg_proc_lookup_remote_peer( request->req_base.req_comm,
request->req_base.req_ompi.req_status.MPI_SOURCE,
(struct mca_ptl_base_module_t*)ptl );
}
peer = (mca_ptl_gm_peer_t*)recv_frag->frag_recv.frag_base.frag_peer;
recv_frag->frag_recv.frag_base.frag_addr = (char*)request->req_base.req_addr + hdr->hdr_frag.hdr_frag_offset;
gm_get( ptl->gm_port, remote_memory->lval,
recv_frag->frag_recv.frag_base.frag_addr,
@ -724,7 +730,7 @@ mca_ptl_gm_recv_frag_t* ptl_gm_handle_recv( struct mca_ptl_gm_module_t *ptl, gm_
case MCA_PTL_HDR_TYPE_ACK:
case MCA_PTL_HDR_TYPE_NACK:
ptl_gm_ctrl_frag(ptl,header);
ptl_gm_ctrl_frag( ptl, header );
break;
default:
ompi_output( 0, "[%s:%d] unexpected frag type %d\n",

Просмотреть файл

@ -31,13 +31,12 @@ int mca_ptl_gm_analyze_recv_event( struct mca_ptl_gm_module_t* ptl, gm_recv_even
void mca_ptl_gm_outstanding_recv( struct mca_ptl_gm_module_t *ptl);
int
mca_ptl_gm_peer_send( struct mca_ptl_gm_peer_t *ptl_peer,
struct mca_ptl_gm_send_frag_t *fragment,
struct mca_pml_base_send_request_t *sendreq,
size_t offset,
size_t *size,
int flags );
int mca_ptl_gm_peer_send( struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_peer_t* ptl_base_peer,
struct mca_pml_base_send_request_t *sendreq,
size_t offset,
size_t size,
int flags );
int
mca_ptl_gm_peer_send_continue( struct mca_ptl_gm_peer_t *ptl_peer,

Просмотреть файл

@ -28,13 +28,6 @@ static void mca_ptl_gm_send_frag_destruct (mca_ptl_gm_send_frag_t * frag);
static void mca_ptl_gm_recv_frag_construct (mca_ptl_gm_recv_frag_t * frag);
static void mca_ptl_gm_recv_frag_destruct (mca_ptl_gm_recv_frag_t * frag);
ompi_class_t mca_ptl_gm_send_frag_t_class = {
"mca_ptl_gm_send_frag_t",
OBJ_CLASS (mca_ptl_base_send_frag_t),
(ompi_construct_t) mca_ptl_gm_send_frag_construct,
(ompi_destruct_t) mca_ptl_gm_send_frag_destruct
};
/*
* send fragment constructor/destructors.
*/
@ -49,6 +42,13 @@ mca_ptl_gm_send_frag_destruct (mca_ptl_gm_send_frag_t * frag)
{
}
ompi_class_t mca_ptl_gm_send_frag_t_class = {
"mca_ptl_gm_send_frag_t",
OBJ_CLASS (mca_ptl_base_send_frag_t),
(ompi_construct_t) mca_ptl_gm_send_frag_construct,
(ompi_destruct_t) mca_ptl_gm_send_frag_destruct
};
/* It's not yet clear for me what's the best solution here. Block until we
* get a free request or allocate a new one. The fist case allow us to never
* take care of the gm allocated DMA buffer as all send fragments already have
@ -63,29 +63,26 @@ mca_ptl_gm_send_frag_t *
mca_ptl_gm_alloc_send_frag( struct mca_ptl_gm_module_t *ptl,
struct mca_pml_base_send_request_t * sendreq )
{
ompi_free_list_t *flist;
ompi_list_item_t *item;
mca_ptl_gm_send_frag_t *sendfrag;
int32_t rc;
flist = &(ptl->gm_send_frags);
/* first get a gm_send_frag */
OMPI_FREE_LIST_WAIT( &(ptl->gm_send_frags), item, rc );
OMPI_FREE_LIST_GET( &(ptl->gm_send_frags), item, rc );
sendfrag = (mca_ptl_gm_send_frag_t *)item;
/* And then get some DMA memory to put the data */
OMPI_FREE_LIST_WAIT( &(ptl->gm_send_dma_frags), item, rc );
ompi_atomic_sub( &(ptl->num_send_tokens), 1 );
assert( ptl->num_send_tokens >= 0 );
sendfrag->send_buf = (void*)item;
sendfrag->req = (struct mca_pml_base_send_request_t *)sendreq;
sendfrag->status = -1;
sendfrag->type = -1;
sendfrag->wait_for_ack = 0;
sendfrag->put_sent = -1;
sendfrag->req = sendreq;
sendfrag->send_frag.frag_base.frag_owner = (struct mca_ptl_base_module_t*)ptl;
sendfrag->frag_bytes_processed = 0;
sendfrag->status = -1;
sendfrag->type = -1;
sendfrag->wait_for_ack = 0;
sendfrag->put_sent = -1;
return sendfrag;
}
@ -175,13 +172,6 @@ int mca_ptl_gm_put_frag_init( struct mca_ptl_gm_send_frag_t* putfrag,
return OMPI_SUCCESS;
}
ompi_class_t mca_ptl_gm_recv_frag_t_class = {
"mca_ptl_gm_recv_frag_t",
OBJ_CLASS (mca_ptl_base_recv_frag_t),
(ompi_construct_t) mca_ptl_gm_recv_frag_construct,
(ompi_construct_t) mca_ptl_gm_recv_frag_destruct
};
/*
* recv fragment constructor/destructors.
*/
@ -196,18 +186,10 @@ mca_ptl_gm_recv_frag_destruct (mca_ptl_gm_recv_frag_t *frag)
{
}
mca_ptl_gm_recv_frag_t *
mca_ptl_gm_alloc_recv_frag( struct mca_ptl_base_module_t *ptl )
{
int rc;
ompi_list_item_t* item;
mca_ptl_gm_recv_frag_t* frag;
OMPI_FREE_LIST_GET( &(((mca_ptl_gm_module_t *)ptl)->gm_recv_frags_free), item, rc );
frag = (mca_ptl_gm_recv_frag_t*)item;
frag->frag_recv.frag_base.frag_owner = (struct mca_ptl_base_module_t*)ptl;
return frag;
}
ompi_class_t mca_ptl_gm_recv_frag_t_class = {
"mca_ptl_gm_recv_frag_t",
OBJ_CLASS (mca_ptl_base_recv_frag_t),
(ompi_construct_t) mca_ptl_gm_recv_frag_construct,
(ompi_construct_t) mca_ptl_gm_recv_frag_destruct
};

Просмотреть файл

@ -169,8 +169,19 @@ extern "C" {
int mca_ptl_gm_send_frag_done( struct mca_ptl_gm_send_frag_t* frag,
struct mca_pml_base_send_request_t* req);
mca_ptl_gm_recv_frag_t *
mca_ptl_gm_alloc_recv_frag( struct mca_ptl_base_module_t *ptl );
static inline mca_ptl_gm_recv_frag_t*
mca_ptl_gm_alloc_recv_frag( struct mca_ptl_base_module_t *ptl )
{
int rc;
ompi_list_item_t* item;
mca_ptl_gm_recv_frag_t* frag;
OMPI_FREE_LIST_GET( &(((mca_ptl_gm_module_t *)ptl)->gm_recv_frags_free), item, rc );
frag = (mca_ptl_gm_recv_frag_t*)item;
frag->frag_recv.frag_base.frag_owner = (struct mca_ptl_base_module_t*)ptl;
return frag;
}
#if defined(c_plusplus) || defined(__cplusplus)
}