check in some debugging code, to unify all the frag lists altogether
This commit was SVN r2192.
Этот коммит содержится в:
родитель
7f3f72f1c7
Коммит
d5a714b63d
@ -24,7 +24,7 @@
|
||||
mca_ptl_elan_module_t mca_ptl_elan_module = {
|
||||
{
|
||||
&mca_ptl_elan_component.super,
|
||||
4,
|
||||
2,
|
||||
sizeof(mca_ptl_elan_send_frag_t),
|
||||
0, /* ptl_exclusivity */
|
||||
0, /* ptl_latency */
|
||||
@ -167,7 +167,7 @@ mca_ptl_elan_req_init (struct mca_ptl_base_module_t *ptl,
|
||||
{
|
||||
mca_ptl_elan_send_frag_t *desc;
|
||||
|
||||
START_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
START_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
|
||||
desc = mca_ptl_elan_alloc_send_desc(ptl, request, MCA_PTL_ELAN_DESC_QDMA);
|
||||
if (NULL == desc) {
|
||||
@ -181,7 +181,7 @@ mca_ptl_elan_req_init (struct mca_ptl_base_module_t *ptl,
|
||||
}
|
||||
desc->desc->desc_status = MCA_PTL_ELAN_DESC_CACHED;
|
||||
|
||||
END_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
END_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -192,13 +192,14 @@ mca_ptl_elan_req_fini (struct mca_ptl_base_module_t *ptl,
|
||||
/* XXX: Lock to be added */
|
||||
ompi_ptl_elan_queue_ctrl_t *queue;
|
||||
mca_ptl_elan_send_frag_t *desc;
|
||||
|
||||
queue = ((struct mca_ptl_elan_module_t * )ptl)->queue;
|
||||
START_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
|
||||
/* return the fragment and update the status */
|
||||
queue = ((struct mca_ptl_elan_module_t * )ptl)->queue;
|
||||
desc = ((mca_ptl_elan_send_request_t *) request)->req_frag;
|
||||
OMPI_FREE_LIST_RETURN (&queue->tx_desc_free, (ompi_list_item_t *) desc);
|
||||
desc->desc->desc_status = MCA_PTL_ELAN_DESC_LOCAL;
|
||||
END_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -242,7 +243,7 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl,
|
||||
* correspondingly multiple LOCKS to go through
|
||||
*/
|
||||
|
||||
START_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
START_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
|
||||
if (offset == 0) { /* The first fragment uses a cached desc */
|
||||
desc = ((mca_ptl_elan_send_request_t*)sendreq)->req_frag;
|
||||
@ -265,7 +266,7 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl,
|
||||
/* Update offset */
|
||||
sendreq->req_offset += size;
|
||||
|
||||
END_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
END_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -56,9 +56,9 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
||||
elan_size = ALIGNUP (sizeof (E4_Event), elan_align);
|
||||
|
||||
OBJ_CONSTRUCT(&flist->fl_lock, ompi_mutex_t);
|
||||
flist->fl_elem_size = flist->fl_max_to_alloc = 128;
|
||||
flist->fl_elem_size = flist->fl_max_to_alloc = OMPI_PTL_ELAN_MAX_QDESCS;
|
||||
flist->fl_num_allocated = 0;
|
||||
flist->fl_num_per_alloc = count = 16;
|
||||
flist->fl_num_per_alloc = count = OMPI_PTL_ELAN_NUM_QDESCS;
|
||||
flist->fl_elem_class = NULL; /* leave it null */
|
||||
flist->fl_mpool = NULL; /* leave it null */
|
||||
|
||||
@ -86,14 +86,12 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
||||
frag->desc = (ompi_ptl_elan_base_desc_t *)desc;
|
||||
|
||||
/* Initialize some of the dma structures */
|
||||
{
|
||||
desc->main_dma.dma_dstAddr = 0;
|
||||
desc->main_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event);
|
||||
desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input);
|
||||
INITEVENT_WORD (ctx, desc->elan_event, &desc->main_doneWord);
|
||||
RESETEVENT_WORD (&desc->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx, desc->elan_event, 1);
|
||||
}
|
||||
desc->main_dma.dma_dstAddr = 0;
|
||||
desc->main_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event);
|
||||
desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input);
|
||||
INITEVENT_WORD (ctx, desc->elan_event, &desc->main_doneWord);
|
||||
RESETEVENT_WORD (&desc->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx, desc->elan_event, 1);
|
||||
|
||||
item = (ompi_list_item_t *) frag;
|
||||
ompi_list_append (&flist->super, item);
|
||||
@ -310,14 +308,13 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
|
||||
* dma and IRQ etc but more open to update.
|
||||
*
|
||||
* Initialize a new event list managing this queue */
|
||||
|
||||
ompi_init_elan_queue_events (ptl, queue);
|
||||
|
||||
/* Allocate a cookie pool */
|
||||
queue->tx_cpool = elan4_allocCookiePool (ctx, ptl->elan_vp);
|
||||
|
||||
/* Init the Receive Queue structure */
|
||||
queue->rx_nslots = 128;
|
||||
queue->rx_nslots = OMPI_PTL_ELAN_MAX_QSLOTS;
|
||||
nslots += ELAN_QUEUE_LOST_SLOTS;
|
||||
|
||||
queue->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ?
|
||||
@ -442,11 +439,6 @@ ompi_init_elan_putget (mca_ptl_elan_component_t * emp,
|
||||
CQ_SetEventEnableBit, cqp);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (putget->get_cmdq, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/* XXX: With elan4_disp_cmdq_params(),
|
||||
* put_cmdq->cmd_flush == elan4_flush_cmdq_reorder
|
||||
* get_cmdq->cmd_flush == elan4_flush_cmdq_reorder
|
||||
*/
|
||||
|
||||
putget->pg_cmdStream = malloc(PAGESIZE);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (putget->pg_cmdStream, NULL, OMPI_ERROR, 0);
|
||||
|
||||
@ -458,7 +450,7 @@ ompi_init_elan_putget (mca_ptl_elan_component_t * emp,
|
||||
|
||||
putget->pg_cpool = elan4_allocCookiePool(ctx, ptl->elan_vp);
|
||||
|
||||
ompi_ptl_elan_init_putget_ctrl (ptl, rail, putget, 0, 2, 32);
|
||||
ompi_ptl_elan_init_putget_ctrl (ptl, rail, putget, 0, 8, 32);
|
||||
}
|
||||
|
||||
END_FUNC(PTL_ELAN_DEBUG_INIT);
|
||||
|
@ -108,7 +108,6 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr,
|
||||
}
|
||||
|
||||
if (ompi_using_threads ()) {
|
||||
|
||||
ompi_mutex_lock(&flist->fl_lock);
|
||||
item = ompi_list_remove_first (&((flist)->super));
|
||||
|
||||
@ -122,13 +121,27 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr,
|
||||
}
|
||||
ompi_mutex_unlock(&flist->fl_lock);
|
||||
} else {
|
||||
if (MCA_PTL_ELAN_DESC_QDMA == desc_type )
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
||||
"before list %p length %d item %p\n",
|
||||
flist, flist->super.ompi_list_length,
|
||||
item);
|
||||
|
||||
item = ompi_list_remove_first (&((flist)->super));
|
||||
|
||||
if (MCA_PTL_ELAN_DESC_QDMA == desc_type )
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
||||
"after list %p length %d item %p\n",
|
||||
flist, flist->super.ompi_list_length,
|
||||
item);
|
||||
|
||||
/* Progress this PTL module to get back a descriptor,
|
||||
* Is it OK to progress with ptl->ptl_send_progress()? */
|
||||
while (NULL == item) {
|
||||
mca_ptl_tstamp_t tstamp = 0;
|
||||
|
||||
/*LOG_PRINT(PTL_ELAN_DEBUG_ACK, "Warning: no more
|
||||
* descriptors\n");*/
|
||||
/* XXX:
|
||||
* Well, this still does not trigger the progress on
|
||||
* PTL's from other modules. Wait for PML to change.
|
||||
@ -160,25 +173,19 @@ mca_ptl_elan_send_desc_done (
|
||||
mca_pml_base_send_request_t *req)
|
||||
{
|
||||
mca_ptl_elan_module_t *ptl;
|
||||
ompi_ptl_elan_queue_ctrl_t *queue;
|
||||
mca_ptl_base_header_t *header;
|
||||
|
||||
START_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
ptl = ((ompi_ptl_elan_qdma_desc_t *)desc->desc)->ptl;
|
||||
header = &desc->frag_base.frag_header;
|
||||
queue = ptl->queue;
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_SEND) {
|
||||
char hostname[32];
|
||||
gethostname(hostname, 32);
|
||||
|
||||
fprintf(stderr, "req %p flag %d, length %d\n",
|
||||
req,
|
||||
header->hdr_common.hdr_flags,
|
||||
header->hdr_frag.hdr_frag_length);
|
||||
}
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
||||
"list %p length %d\n",
|
||||
&ptl->queue->tx_desc_free,
|
||||
ptl->queue->tx_desc_free.super.ompi_list_length);
|
||||
|
||||
if(NULL == req) { /* An ack descriptor */
|
||||
OMPI_FREE_LIST_RETURN (&queue->tx_desc_free,
|
||||
OMPI_FREE_LIST_RETURN (&ptl->queue->tx_desc_free,
|
||||
(ompi_list_item_t *) desc);
|
||||
}
|
||||
#if 1
|
||||
@ -186,16 +193,49 @@ mca_ptl_elan_send_desc_done (
|
||||
& MCA_PTL_FLAGS_ACK_MATCHED)
|
||||
|| mca_pml_base_send_request_matched(req)) {
|
||||
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
||||
"returning req %p mpi_done %d pml_done %d \n",
|
||||
req,
|
||||
req->req_base.req_mpi_done,
|
||||
req->req_base.req_pml_done);
|
||||
|
||||
|
||||
if(fetchNset (&desc->frag_progressed, 1) == 0) {
|
||||
ptl->super.ptl_send_progress(ptl, req,
|
||||
header->hdr_frag.hdr_frag_length);
|
||||
}
|
||||
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
||||
"returning req %p mpi_done %d pml_done %d \n",
|
||||
req,
|
||||
req->req_base.req_mpi_done,
|
||||
req->req_base.req_pml_done);
|
||||
|
||||
/* Return a frag or if not cached, or it is a follow up */
|
||||
if((header->hdr_frag.hdr_frag_offset != 0) || (desc->desc->desc_status
|
||||
!= MCA_PTL_ELAN_DESC_CACHED))
|
||||
OMPI_FREE_LIST_RETURN (&queue->tx_desc_free,
|
||||
(ompi_list_item_t *) desc);
|
||||
if (
|
||||
/*(header->hdr_frag.hdr_frag_offset != 0) || */
|
||||
(desc->desc->desc_status != MCA_PTL_ELAN_DESC_CACHED)){
|
||||
if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_PUT) {
|
||||
OMPI_FREE_LIST_RETURN (&ptl->putget->put_desc_free,
|
||||
(ompi_list_item_t *) desc);
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
||||
"list %p length %d\n",
|
||||
&ptl->putget->put_desc_free,
|
||||
ptl->putget->put_desc_free.super.ompi_list_length);
|
||||
} else {
|
||||
OMPI_FREE_LIST_RETURN (&ptl->queue->tx_desc_free,
|
||||
(ompi_list_item_t *) desc);
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
||||
"list %p length %d\n",
|
||||
&ptl->queue->tx_desc_free,
|
||||
ptl->queue->tx_desc_free.super.ompi_list_length);
|
||||
}
|
||||
} else {
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
||||
"PML return frag to list %p, length %d\n",
|
||||
&ptl->queue->tx_desc_free,
|
||||
ptl->queue->tx_desc_free.super.ompi_list_length);
|
||||
}
|
||||
}
|
||||
#else
|
||||
else {
|
||||
@ -228,6 +268,13 @@ mca_ptl_elan_send_desc_done (
|
||||
(ompi_list_item_t *) desc);
|
||||
}
|
||||
#endif
|
||||
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
||||
"list %p length %d\n",
|
||||
&ptl->queue->tx_desc_free,
|
||||
ptl->queue->tx_desc_free.super.ompi_list_length);
|
||||
|
||||
END_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -141,27 +141,22 @@ mca_ptl_elan_ctrl_frag (struct mca_ptl_elan_module_t *ptl,
|
||||
START_FUNC(PTL_ELAN_DEBUG_ACK);
|
||||
|
||||
desc = (mca_ptl_elan_send_frag_t*) header->hdr_ack.hdr_src_ptr.pval;
|
||||
req = (mca_pml_base_send_request_t *) desc->desc->req;
|
||||
req = (mca_pml_base_send_request_t *) desc->desc->req;
|
||||
|
||||
req->req_peer_match = header->hdr_ack.hdr_dst_match;
|
||||
req->req_peer_addr = header->hdr_ack.hdr_dst_addr;
|
||||
req->req_peer_size = header->hdr_ack.hdr_dst_size;
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_ACK) {
|
||||
char hostname[32];
|
||||
gethostname(hostname, 32);
|
||||
fprintf(stderr, "[%s][%s:%d] remote req %p addr %p, length %d\n",
|
||||
hostname, __FUNCTION__, __LINE__,
|
||||
req->req_peer_match.pval,
|
||||
req->req_peer_addr.pval,
|
||||
req->req_peer_size);
|
||||
}
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK, "remote req %p addr %p, length %d\n",
|
||||
req->req_peer_match.pval,
|
||||
req->req_peer_addr.pval,
|
||||
req->req_peer_size);
|
||||
|
||||
/* FIXME:
|
||||
* This sort of synchronized fragment release will lead
|
||||
* to race conditions, also see the note insize the follwoing routine */
|
||||
mca_ptl_elan_send_desc_done (desc, req);
|
||||
END_FUNC(PTL_ELAN_DEBUG_RECV);
|
||||
END_FUNC(PTL_ELAN_DEBUG_ACK);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -182,7 +177,7 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
mca_ptl_base_header_t *hdr;
|
||||
struct ompi_ptl_elan_qdma_desc_t * desc;
|
||||
|
||||
START_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
START_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
|
||||
desc = (ompi_ptl_elan_qdma_desc_t *)frag->desc;
|
||||
destvp = ptl_peer->peer_vp;
|
||||
@ -220,13 +215,9 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
header_length = sizeof (mca_ptl_base_frag_header_t);
|
||||
}
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_SEND) {
|
||||
char hostname[32]; gethostname(hostname, 32);
|
||||
fprintf(stderr, "[%s:%s:%d] frag %p req %p \n",
|
||||
hostname, __FUNCTION__, __LINE__,
|
||||
hdr->hdr_frag.hdr_src_ptr.pval,
|
||||
hdr->hdr_frag.hdr_dst_ptr.pval);
|
||||
}
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_SEND, "frag %p req %p \n",
|
||||
hdr->hdr_frag.hdr_src_ptr.pval,
|
||||
hdr->hdr_frag.hdr_dst_ptr.pval);
|
||||
|
||||
/* initialize convertor */
|
||||
if(size_in > 0) {
|
||||
@ -279,12 +270,10 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
size_out),
|
||||
DMA_DataTypeByte,
|
||||
DMA_QueueWrite, 16);
|
||||
desc->main_dma.dma_cookie = elan4_local_cookie (ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||
|
||||
desc->main_dma.dma_cookie =
|
||||
elan4_local_cookie (ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_SEND) {
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_MAC) {
|
||||
char hostname[32];
|
||||
|
||||
gethostname(hostname, 32);
|
||||
@ -298,7 +287,7 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
END_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
END_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -350,22 +339,17 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
desc->dst_elan_addr = (E4_Addr)pml_req->req_peer_addr.lval;
|
||||
desc->desc_buff = hdr;
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_PUT) {
|
||||
char hostname[32];
|
||||
gethostname(hostname, 32);
|
||||
fprintf(stderr, "[%s][%s:%d] remote req %p addr %x, length %d\n",
|
||||
hostname, __FUNCTION__, __LINE__,
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_PUT, " remote req %p addr %x, length %d\n",
|
||||
pml_req->req_peer_match.pval,
|
||||
pml_req->req_peer_addr.lval,
|
||||
pml_req->req_peer_size);
|
||||
}
|
||||
|
||||
/* initialize convertor */
|
||||
/* FIXME: initialize convertor and get the fragment copied out */
|
||||
if(size_in > 0 && 0) {
|
||||
struct iovec iov;
|
||||
ompi_convertor_t *convertor;
|
||||
|
||||
if( offset <= mca_ptl_elan_module.super.ptl_first_frag_size ) {
|
||||
if( offset <= mca_ptl_elan_module.super.ptl_first_frag_size) {
|
||||
convertor = &pml_req->req_convertor;
|
||||
} else {
|
||||
convertor = &frag->frag_base.frag_convertor;
|
||||
@ -379,9 +363,11 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
offset);
|
||||
}
|
||||
|
||||
/* For now, eager sends are always packed into the descriptor
|
||||
* TODO: Inline up to 256 bytes (including the header), then
|
||||
* do a chained send for mesg < first_frag_size */
|
||||
/*
|
||||
* TODO:
|
||||
* For now, eager sends are always packed into the descriptor
|
||||
* Inline up to 256 bytes (including the header), then
|
||||
* do a chained send for mesg < first_frag_size */
|
||||
|
||||
/*desc->src_elan_addr = elan4_main2elan(ctx, desc->desc_buff);*/
|
||||
iov.iov_base = desc->desc_buff;
|
||||
@ -401,13 +387,10 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
*size = size_out;
|
||||
hdr->hdr_frag.hdr_frag_length = size_out;
|
||||
|
||||
/* XXX: no additional flags for the DMA, remote, shmem, qwrite,
|
||||
* broadcast, etc */
|
||||
flags = 0;
|
||||
|
||||
/*
|
||||
* FIXME:
|
||||
* Be sure to correctly setup a chained DMA.
|
||||
/* FIXME: no additional flags for the DMA, remote, shmem, qwrite,
|
||||
* broadcast, etc. Be sure to correctly setup a chained DMA.
|
||||
*/
|
||||
|
||||
/* Setup the chain dma */
|
||||
@ -458,14 +441,13 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
* Allocate space from command queues hanged off the CTX.
|
||||
*/
|
||||
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
|
||||
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
||||
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
|
||||
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
|
||||
|
||||
/* Chain an event */
|
||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx,
|
||||
(E4_Event *)desc->chain_event);
|
||||
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
|
||||
|
||||
/* XXX: Hardcoded DMA retry count */
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (size_out,
|
||||
@ -478,7 +460,7 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
destvp);
|
||||
desc->main_dma.dma_vproc = destvp;
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_PUT) {
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_MAC) {
|
||||
char hostname[32];
|
||||
gethostname(hostname, 32);
|
||||
fprintf(stderr, "[%s send...] destvp %d type %d flag %d size %d\n",
|
||||
@ -489,7 +471,7 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
END_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
END_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
}
|
||||
|
||||
int
|
||||
@ -504,13 +486,12 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc,
|
||||
|
||||
ptl = ptl_peer->peer_ptl;
|
||||
|
||||
START_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
START_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
|
||||
if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_QDMA) {
|
||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
||||
|
||||
qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc;
|
||||
|
||||
mca_ptl_elan_init_qdma_desc (desc, ptl, ptl_peer, sendreq,
|
||||
offset, size, flags);
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma);
|
||||
@ -529,9 +510,7 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc,
|
||||
/* For each put/get descriptor, a QDMA is chained off. */
|
||||
mca_ptl_elan_init_putget_desc (desc, ptl, ptl_peer, sendreq,
|
||||
offset, size, flags);
|
||||
|
||||
elan4_run_dma_cmd (ptl->putget->put_cmdq, (E4_DMA *) &pdesc->main_dma);
|
||||
|
||||
/*ptl->queue->tx_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (ptl->putget->put_cmdq);
|
||||
|
||||
@ -552,7 +531,7 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc,
|
||||
desc->frag_progressed = 0;
|
||||
desc->frag_ack_pending = 0; /* this is ack for internal elan */
|
||||
|
||||
END_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
END_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -577,9 +556,7 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
||||
elan_ptl = (mca_ptl_elan_module_t *) ptl;
|
||||
desc->desc->desc_type = MCA_PTL_ELAN_DESC_QDMA;
|
||||
qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc;
|
||||
|
||||
hdr = (mca_ptl_base_header_t *) & qdma->buff[0];
|
||||
|
||||
request = recv_frag->frag_recv.frag_request;
|
||||
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK;
|
||||
@ -608,12 +585,13 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_ACK) {
|
||||
char hostname[32]; gethostname(hostname, 32);
|
||||
fprintf(stderr, "[%s:%s:%d] remote frag %p local req %p buffer %p size %d \n",
|
||||
hostname, __FUNCTION__, __LINE__,
|
||||
hdr->hdr_ack.hdr_src_ptr.pval,
|
||||
hdr->hdr_ack.hdr_dst_match.pval,
|
||||
hdr->hdr_ack.hdr_dst_addr.pval,
|
||||
hdr->hdr_ack.hdr_dst_size);
|
||||
fprintf(stderr,
|
||||
"[%s:%s:%d] remote frag %p local req %p buffer %p size %d \n",
|
||||
hostname, __FUNCTION__, __LINE__,
|
||||
hdr->hdr_ack.hdr_src_ptr.pval,
|
||||
hdr->hdr_ack.hdr_dst_match.pval,
|
||||
hdr->hdr_ack.hdr_dst_addr.pval,
|
||||
hdr->hdr_ack.hdr_dst_size);
|
||||
}
|
||||
|
||||
/* Filling up QDMA descriptor */
|
||||
@ -631,10 +609,7 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
|
||||
elan4_run_dma_cmd (elan_ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma);
|
||||
|
||||
/*ptl->queue->tx_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (elan_ptl->queue->tx_cmdq);
|
||||
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
@ -665,7 +640,7 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp)
|
||||
int i;
|
||||
int rc;
|
||||
|
||||
START_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
START_FUNC(PTL_ELAN_DEBUG_RECV);
|
||||
num_ptl_modules = emp->elan_num_ptl_modules;
|
||||
|
||||
/* Iterate over all the PTL input Queues */
|
||||
@ -675,22 +650,18 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp)
|
||||
queue = emp->elan_ptl_modules[i]->queue;
|
||||
rxq = queue->rxq;
|
||||
ctx = ptl->ptl_elan_ctx;
|
||||
|
||||
OMPI_LOCK (&queue->rx_lock);
|
||||
|
||||
#if 1
|
||||
rc = (*(int *) (&rxq->qr_doneWord));
|
||||
#else
|
||||
rc = elan4_pollevent_word (ctx, &rxq->qr_doneWord, 2000);
|
||||
rc = elan4_pollevent_word (ctx, &rxq->qr_doneWord, 1);
|
||||
#endif
|
||||
|
||||
if (rc) {
|
||||
|
||||
mca_ptl_base_header_t *header;
|
||||
|
||||
header = (mca_ptl_base_header_t *) rxq->qr_fptr;
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_RECV) {
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_MAC) {
|
||||
char hostname[32];
|
||||
gethostname(hostname, 32);
|
||||
|
||||
@ -719,8 +690,8 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp)
|
||||
break;
|
||||
default:
|
||||
fprintf(stdout, "[%s:%d] unknow fragment type %d\n",
|
||||
__FILE__, __LINE__,
|
||||
header->hdr_common.hdr_type);
|
||||
__FILE__, __LINE__,
|
||||
header->hdr_common.hdr_type);
|
||||
fflush(stdout);
|
||||
break;
|
||||
}
|
||||
@ -735,13 +706,10 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp)
|
||||
rxq->qr_efptr += queue->rx_slotsize;
|
||||
}
|
||||
|
||||
/* PCI Write */
|
||||
/* PCI Write, Reset the event
|
||||
* Order RESETEVENT wrt to wait_event_cmd */
|
||||
queue->input->q_fptr = rxq->qr_efptr;
|
||||
|
||||
/* Reset the event */
|
||||
RESETEVENT_WORD (&rxq->qr_doneWord);
|
||||
|
||||
/* Order RESETEVENT wrt to wait_event_cmd */
|
||||
MEMBAR_STORESTORE ();
|
||||
|
||||
/* Re-prime queue event by issuing a waitevent(1) on it */
|
||||
@ -752,15 +720,12 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp)
|
||||
E4_EVENT_DTYPE_LONG, 0),
|
||||
MAIN2ELAN (ctx, (void *) &rxq->qr_doneWord),
|
||||
0xfeedfacedeadbeef);
|
||||
|
||||
/*rxq->qr_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (rxq->qr_cmdq);
|
||||
|
||||
}
|
||||
OMPI_UNLOCK (&queue->rx_lock);
|
||||
}
|
||||
|
||||
END_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
END_FUNC(PTL_ELAN_DEBUG_RECV);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -776,8 +741,6 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
|
||||
int i;
|
||||
int rc = 0;
|
||||
|
||||
START_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
|
||||
num_ptl_modules = emp->elan_num_ptl_modules;
|
||||
|
||||
/* Update the send request if any of send's is completed */
|
||||
@ -794,7 +757,7 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
|
||||
rc = * ((int *) (&desc->desc->main_doneWord));
|
||||
#else
|
||||
/* Poll the completion event for 1usec */
|
||||
rc = elan4_pollevent_word(ctx, &desc->desc->main_doneWord, 2000);
|
||||
rc = elan4_pollevent_word(ctx, &desc->desc->main_doneWord, 1);
|
||||
#endif
|
||||
if (rc) {
|
||||
mca_ptl_base_header_t *header;
|
||||
@ -805,28 +768,20 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
|
||||
desc = (mca_ptl_elan_send_frag_t *)
|
||||
ompi_list_remove_first (&queue->tx_desc);
|
||||
qdma = (ompi_ptl_elan_qdma_desc_t*)desc->desc;
|
||||
|
||||
req = (mca_ptl_elan_send_request_t *)qdma->req;
|
||||
header = (mca_ptl_base_header_t *)&qdma->buff[0];
|
||||
header = (mca_ptl_base_header_t *)&qdma->buff[0];
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_SEND) {
|
||||
char hostname[32];
|
||||
gethostname(hostname, 32);
|
||||
|
||||
fprintf(stderr,
|
||||
"[%s comp sending...] type %d flag %d size %d\n",
|
||||
hostname,
|
||||
header->hdr_common.hdr_type,
|
||||
header->hdr_common.hdr_flags,
|
||||
header->hdr_common.hdr_size);
|
||||
}
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_MAC,
|
||||
"[comp send] type %d flag %d size %d\n",
|
||||
header->hdr_common.hdr_type,
|
||||
header->hdr_common.hdr_flags,
|
||||
header->hdr_common.hdr_size);
|
||||
mca_ptl_elan_send_desc_done (desc, req);
|
||||
|
||||
/* Remember to reset the events */
|
||||
INITEVENT_WORD (ctx, qdma->elan_event, &qdma->main_doneWord);
|
||||
RESETEVENT_WORD (&qdma->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx, qdma->elan_event, 1);
|
||||
|
||||
} else {
|
||||
/* XXX: Stop at any incomplete send desc */
|
||||
break;
|
||||
@ -837,7 +792,6 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
|
||||
/* Have the putget list checking to be in the same function */
|
||||
mca_ptl_elan_update_putget(emp);
|
||||
|
||||
END_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -853,8 +807,6 @@ mca_ptl_elan_update_putget (mca_ptl_elan_component_t * emp)
|
||||
int i;
|
||||
int rc = 0;
|
||||
|
||||
START_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
|
||||
num_ptl_modules = emp->elan_num_ptl_modules;
|
||||
|
||||
/* Update the send request if any of send's is completed */
|
||||
@ -867,11 +819,11 @@ mca_ptl_elan_update_putget (mca_ptl_elan_component_t * emp)
|
||||
while (ompi_list_get_size (&putget->put_desc) > 0) {
|
||||
desc = (mca_ptl_elan_send_frag_t *)
|
||||
ompi_list_get_first (&putget->put_desc);
|
||||
#if 0
|
||||
#if 1
|
||||
rc = * ((int *) (&desc->desc->main_doneWord));
|
||||
#else
|
||||
/* Poll the completion event for 1usec */
|
||||
rc = elan4_pollevent_word(ctx, &desc->desc->main_doneWord, 2000);
|
||||
rc = elan4_pollevent_word(ctx, &desc->desc->main_doneWord, 1);
|
||||
#endif
|
||||
if (rc) {
|
||||
mca_ptl_base_header_t *header;
|
||||
@ -883,27 +835,20 @@ mca_ptl_elan_update_putget (mca_ptl_elan_component_t * emp)
|
||||
desc = (mca_ptl_elan_send_frag_t *)
|
||||
ompi_list_remove_first (&putget->put_desc);
|
||||
pdesc = (ompi_ptl_elan_putget_desc_t*)desc->desc;
|
||||
req = (mca_ptl_elan_send_request_t *)pdesc->req;
|
||||
header= (mca_ptl_base_header_t *)pdesc->desc_buff;
|
||||
|
||||
req = (mca_ptl_elan_send_request_t *)pdesc->req;
|
||||
header = (mca_ptl_base_header_t *)pdesc->desc_buff;
|
||||
|
||||
if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_PUT) {
|
||||
char hostname[32];
|
||||
gethostname(hostname, 32);
|
||||
|
||||
fprintf(stderr,
|
||||
"[%s comp sending...] type %d flag %d size %d\n",
|
||||
hostname,
|
||||
header->hdr_common.hdr_type,
|
||||
header->hdr_common.hdr_flags,
|
||||
header->hdr_common.hdr_size);
|
||||
}
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_MAC,
|
||||
"[comp put] type %d flag %d size %d\n",
|
||||
header->hdr_common.hdr_type,
|
||||
header->hdr_common.hdr_flags,
|
||||
header->hdr_common.hdr_size);
|
||||
|
||||
/*mca_ptl_elan_send_desc_done (desc, req);*/
|
||||
OMPI_FREE_LIST_RETURN (&putget->put_desc_free,
|
||||
(ompi_list_item_t *) desc);
|
||||
/* Remember to reset the events */
|
||||
|
||||
/* Remember to reset the events */
|
||||
INITEVENT_WORD (ctx, pdesc->elan_event, &pdesc->main_doneWord);
|
||||
RESETEVENT_WORD (&pdesc->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx, pdesc->elan_event, 1);
|
||||
@ -911,7 +856,6 @@ mca_ptl_elan_update_putget (mca_ptl_elan_component_t * emp)
|
||||
} /* end of the while loop */
|
||||
} /* end of the for loop */
|
||||
|
||||
END_FUNC(PTL_ELAN_DEBUG_NONE);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -47,28 +47,31 @@
|
||||
#define PTL_ELAN_DEBUG_SEND (0x010)
|
||||
#define PTL_ELAN_DEBUG_RECV (0x020)
|
||||
#define PTL_ELAN_DEBUG_ACK (0x040)
|
||||
#define PTL_ELAN_DEBUG_PROG (0x080)
|
||||
#define PTL_ELAN_DEBUG_MAC (0x080)
|
||||
|
||||
#define PTL_ELAN_DEBUG_QDMA (0x100)
|
||||
#define PTL_ELAN_DEBUG_PUT (0x200)
|
||||
#define PTL_ELAN_DEBUG_GET (0x400)
|
||||
#define PTL_ELAN_DEBUG_CHAIN (0x800)
|
||||
|
||||
#define OMPI_PTL_ELAN_MAX_QSLOTS (128)
|
||||
#define OMPI_PTL_ELAN_MAX_QDESCS (128)
|
||||
#define OMPI_PTL_ELAN_NUM_QDESCS (4)
|
||||
|
||||
/* For now only debug send's */
|
||||
#if 1
|
||||
#if 0
|
||||
#define PTL_ELAN_DEBUG_FLAG PTL_ELAN_DEBUG_NONE
|
||||
#else
|
||||
#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_ACK \
|
||||
| PTL_ELAN_DEBUG_SEND | PTL_ELAN_DEBUG_PUT | PTL_ELAN_DEBUG_RECV)
|
||||
#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_MAC|PTL_ELAN_DEBUG_ACK|PTL_ELAN_DEBUG_SEND|PTL_ELAN_DEBUG_PUT)
|
||||
#endif
|
||||
|
||||
#define LOG_PRINT(flag, args...) \
|
||||
do { \
|
||||
if (PTL_ELAN_DEBUG_FLAG & flag) { \
|
||||
char hostname[32]; gethostname(hostname, 32); \
|
||||
fprintf(stderr, "[%s:%s:%d] ", \
|
||||
fprintf(stdout, "[%s:%s:%d] ", \
|
||||
hostname, __FUNCTION__, __LINE__); \
|
||||
fprintf(stderr, args); \
|
||||
fprintf(stdout, args); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
@ -86,7 +89,7 @@ do { \
|
||||
do { \
|
||||
if (PTL_ELAN_DEBUG_FLAG & flag) { \
|
||||
char hostname[32]; gethostname(hostname, 32); \
|
||||
fprintf(stderr, "[%s:%s:%d] Entering ...\n", \
|
||||
fprintf(stdout, "[%s:%s:%d] Entering ...\n", \
|
||||
hostname, __FUNCTION__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
@ -95,7 +98,7 @@ do { \
|
||||
do { \
|
||||
if (PTL_ELAN_DEBUG_FLAG & flag) { \
|
||||
char hostname[32]; gethostname(hostname, 32); \
|
||||
fprintf(stderr, "[%s:%s:%d] Completes ...\n", \
|
||||
fprintf(stdout, "[%s:%s:%d] Completes ...\n", \
|
||||
hostname, __FUNCTION__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
@ -5,10 +5,10 @@
|
||||
#include "mpi.h"
|
||||
#include "test_util.h"
|
||||
|
||||
#define MYBUFSIZE (4*1024*16)
|
||||
#define MYBUFSIZE (4*1024*1024)
|
||||
char s_buf[MYBUFSIZE];
|
||||
char r_buf[MYBUFSIZE];
|
||||
int skip = 40;
|
||||
int skip = 0;
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
@ -25,11 +25,16 @@ main (int argc, char *argv[])
|
||||
|
||||
struct timeval t_start, t_end;
|
||||
|
||||
loop = 2;
|
||||
|
||||
if (argc < 2) {
|
||||
fprintf (stderr, "Usage: %s msg_size\n", argv[0]);
|
||||
return 0;
|
||||
} else {
|
||||
size = atoi (argv[1]);
|
||||
if (argc > 2)
|
||||
loop = atoi (argv[2]);
|
||||
}
|
||||
size = atoi (argv[1]);
|
||||
|
||||
/* Get some environmental variables set for Open MPI, OOB */
|
||||
env_init_for_elan();
|
||||
@ -43,24 +48,22 @@ main (int argc, char *argv[])
|
||||
s_buf[i] = 'a' + i;
|
||||
}
|
||||
|
||||
loop = 1000;
|
||||
gethostname(hostname, 32);
|
||||
|
||||
fprintf(stdout, "[%s:%s:%d] done with init and barrier\n",
|
||||
hostname, __FUNCTION__, __LINE__);
|
||||
fflush(stdout);
|
||||
|
||||
MPI_Barrier (MPI_COMM_WORLD);
|
||||
|
||||
for (i = 0; i < loop + skip; i++) {
|
||||
if (i == skip)
|
||||
gettimeofday (&t_start, 0);
|
||||
if (myid == 0) {
|
||||
MPI_Send (s_buf, size, MPI_CHAR, 1, i, MPI_COMM_WORLD);
|
||||
MPI_Recv (r_buf, size, MPI_CHAR, 1, i, MPI_COMM_WORLD, &stat);
|
||||
/*MPI_Recv (r_buf, size, MPI_CHAR, 1, i, MPI_COMM_WORLD,
|
||||
* &stat);*/
|
||||
} else {
|
||||
MPI_Recv (r_buf, size, MPI_CHAR, 0, i, MPI_COMM_WORLD, &stat);
|
||||
MPI_Send (s_buf, size, MPI_CHAR, 0, i, MPI_COMM_WORLD);
|
||||
/*MPI_Send (s_buf, size, MPI_CHAR, 0, i, MPI_COMM_WORLD);*/
|
||||
}
|
||||
}
|
||||
gettimeofday (&t_end, 0);
|
||||
@ -77,7 +80,6 @@ main (int argc, char *argv[])
|
||||
size, latency);
|
||||
fflush(stdout);
|
||||
}
|
||||
MPI_Barrier (MPI_COMM_WORLD);
|
||||
MPI_Finalize ();
|
||||
return 0;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ int main (int argc, char ** argv)
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &proc);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
/*MPI_Barrier(MPI_COMM_WORLD);*/
|
||||
fprintf(stdout, "[%s:%s:%d] done with init \n",
|
||||
hostname, __FUNCTION__, __LINE__);
|
||||
fflush(stdout);
|
||||
|
@ -6,10 +6,10 @@ static void env_init_for_elan()
|
||||
char *rms_rank;
|
||||
|
||||
setenv("OMPI_MCA_oob_cofs_dir", "/home/1/yuw/tmp", 1);
|
||||
/*setenv("OMPI_MCA_oob_cofs_dir", "/tmp/COFS", 1);*/
|
||||
setenv("OMPI_MCA_pcm_cofs_cellid", "1", 1);
|
||||
setenv("OMPI_MCA_pcm_cofs_jobid", "1", 1);
|
||||
setenv("OMPI_MCA_pcm_cofs_num_procs", "2", 1);
|
||||
setenv("OMPI_MCA_ptl_base_exclude", "tcp", 1);
|
||||
|
||||
if (NULL != (rms_rank = getenv("RMS_RANK"))) {
|
||||
/* RMS_JOBID:RMS_NNODES:RMS_NPROCS:RMS_NODEID:RMS_RESOURCEID */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user