save lots of type and name changes
This commit was SVN r1933.
Этот коммит содержится в:
родитель
5e22c1edfe
Коммит
944432a352
@ -19,6 +19,10 @@
|
||||
#include "ptl_elan_frag.h"
|
||||
#include "ptl_elan_priv.h"
|
||||
|
||||
#if 0
|
||||
#elif defined(ABC)
|
||||
#endif
|
||||
|
||||
/* XXX: There must be multiple PTL's. This could be the template */
|
||||
mca_ptl_elan_module_t mca_ptl_elan_module = {
|
||||
{
|
||||
@ -312,13 +316,14 @@ mca_ptl_elan_put (struct mca_ptl_base_module_t *ptl,
|
||||
|
||||
int
|
||||
mca_ptl_elan_get (struct mca_ptl_base_module_t *ptl,
|
||||
struct mca_ptl_base_peer_t *ptl_base_peer,
|
||||
struct mca_pml_base_recv_request_t *request,
|
||||
struct mca_ptl_base_peer_t *ptl_peer,
|
||||
struct mca_pml_base_recv_request_t *sendreq,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
int flags)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
#if 0
|
||||
mca_ptl_elan_send_frag_t *desc;
|
||||
|
||||
/* XXX:
|
||||
@ -341,6 +346,7 @@ mca_ptl_elan_get (struct mca_ptl_base_module_t *ptl,
|
||||
|
||||
/* Update all the sends until the put is done */
|
||||
END_FUNC();
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -9,9 +9,11 @@
|
||||
#include "ptl_elan.h"
|
||||
#include "ptl_elan_priv.h"
|
||||
|
||||
#define ELAN_QUEUE_MAX INPUT_QUEUE_MAX
|
||||
#define ELAN_QUEUE_LOST_SLOTS 1
|
||||
#define SLOT_ALIGN 128
|
||||
#define PUTGET_THROTTLE (32)
|
||||
#define ELAN_PTL_FASTPATH (0x1)
|
||||
#define ELAN_QUEUE_MAX (INPUT_QUEUE_MAX)
|
||||
#define ELAN_QUEUE_LOST_SLOTS (1)
|
||||
#define SLOT_ALIGN (128)
|
||||
#define MAX(a,b) ((a>b)? a:b)
|
||||
#define ALIGNUP(x,a) (((unsigned int)(x) + ((a)-1)) & (-(a)))
|
||||
|
||||
@ -43,7 +45,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
||||
|
||||
ompi_free_list_t *flist;
|
||||
ompi_ptl_elan_qdma_desc_t *desc;
|
||||
ompi_elan_event_t *elan_ptr;
|
||||
E4_Event *elan_ptr;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
@ -59,7 +61,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
||||
main_align = MAX (sizeof (void *), 8);
|
||||
elan_align = MAX (sizeof (int *), ELAN_BLOCK_ALIGN);
|
||||
main_size = ALIGNUP (sizeof (ompi_ptl_elan_qdma_desc_t), main_align);
|
||||
elan_size = ALIGNUP (sizeof (ompi_elan_event_t), elan_align);
|
||||
elan_size = ALIGNUP (sizeof (E4_Event), elan_align);
|
||||
|
||||
OBJ_CONSTRUCT(&flist->fl_lock, ompi_mutex_t);
|
||||
flist->fl_elem_size = flist->fl_max_to_alloc = 128;
|
||||
@ -80,30 +82,25 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (desc, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/* Allocating elan related structures */
|
||||
elan_ptr = (ompi_elan_event_t *) elan4_allocElan (rail->r_alloc,
|
||||
elan_align,
|
||||
elan_size * count);
|
||||
elan_ptr = (E4_Event *) elan4_allocElan (rail->r_alloc,
|
||||
elan_align, elan_size * count);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0);
|
||||
|
||||
for (i = 0; i < flist->fl_num_per_alloc; i++) {
|
||||
ompi_list_item_t *item;
|
||||
|
||||
desc->rail = rail;
|
||||
desc->ptl = ptl;
|
||||
desc->elan_data_event = elan_ptr;
|
||||
desc->elan_event = elan_ptr;
|
||||
frag->desc = (ompi_ptl_elan_base_desc_t *)desc;
|
||||
|
||||
/* Initialize some of the dma structures */
|
||||
{
|
||||
desc->main_dma.dma_dstAddr = 0;
|
||||
desc->main_dma.dma_srcEvent =
|
||||
SDRAM2ELAN (ctx, &desc->elan_data_event->event32);
|
||||
desc->main_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event);
|
||||
desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input);
|
||||
INITEVENT_WORD (ctx, (EVENT *) & desc->elan_data_event->event32,
|
||||
&desc->main_doneWord);
|
||||
INITEVENT_WORD (ctx, desc->elan_event, &desc->main_doneWord);
|
||||
RESETEVENT_WORD (&desc->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx,
|
||||
(EVENT *) & desc->elan_data_event->event32, 1);
|
||||
PRIMEEVENT_WORD (ctx, desc->elan_event, 1);
|
||||
}
|
||||
|
||||
item = (ompi_list_item_t *) frag;
|
||||
@ -111,7 +108,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
||||
|
||||
/* Progress to the next element */
|
||||
desc = (ompi_ptl_elan_qdma_desc_t *) ((char *) desc + main_size);
|
||||
elan_ptr = (ompi_elan_event_t *) ((char *) elan_ptr + elan_size);
|
||||
elan_ptr = (E4_Event *) ((char *) elan_ptr + elan_size);
|
||||
frag ++;
|
||||
}
|
||||
flist->fl_num_allocated += flist->fl_num_per_alloc;
|
||||
@ -132,9 +129,9 @@ mca_ptl_elan_putget_desc_contruct (
|
||||
/* Zero this descriptor */
|
||||
memset(desc, 0, sizeof(desc));
|
||||
|
||||
desc->dma_typeSize = 0;
|
||||
desc->dma_cookie = 0;
|
||||
desc->dma_vproc = 0;
|
||||
desc->main_dma.dma_typeSize = 0;
|
||||
desc->main_dma.dma_cookie = 0;
|
||||
desc->main_dma.dma_vproc = 0;
|
||||
|
||||
/* Remember all the address needs to be converted
|
||||
* before assigning to DMA descritpor */
|
||||
@ -155,26 +152,26 @@ mca_ptl_elan_putget_desc_contruct (
|
||||
mb();
|
||||
}
|
||||
|
||||
#define OMPI_ELAN_DESC_LIST(ctx, flist, frag, desc, eptr, msize, esize, local)\
|
||||
#define OMPI_ELAN_DESC_LIST(ctx, flist, frag, dp, eptr, msize, esize, local)\
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < flist->fl_num_per_alloc; i++) { \
|
||||
ompi_list_item_t *item; \
|
||||
\
|
||||
desc->elan_data_event = eptr; \
|
||||
frag->desc = (ompi_ptl_elan_base_desc_t *)desc; \
|
||||
dp->elan_event = eptr; \
|
||||
frag->desc = (ompi_ptl_elan_base_desc_t *)dp; \
|
||||
\
|
||||
/* Initialize some of the dma structures */ \
|
||||
mca_ptl_elan_putget_desc_contruct (ctx, desc, \
|
||||
mca_ptl_elan_putget_desc_contruct (ctx, dp, \
|
||||
eptr, 0, 0, local); \
|
||||
\
|
||||
item = (ompi_list_item_t *) frag; \
|
||||
ompi_list_append (&flist->super, item); \
|
||||
\
|
||||
/* Progress to the next element */ \
|
||||
desc = (ompi_ptl_elan_putget_desc_t *) \
|
||||
((char *)desc + msize); \
|
||||
eptr = (ompi_elan_event_t *) ((char *) eptr + esize); \
|
||||
dp= (ompi_ptl_elan_putget_desc_t *) \
|
||||
((char *)dp + msize); \
|
||||
eptr = (E4_Event *) ((char *) eptr + esize); \
|
||||
frag ++; \
|
||||
} \
|
||||
flist->fl_num_allocated += flist->fl_num_per_alloc; \
|
||||
@ -187,15 +184,13 @@ ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl,
|
||||
ompi_ptl_elan_putget_ctrl_t * putget,
|
||||
int init_num, int inc_num, int max_num)
|
||||
{
|
||||
int i;
|
||||
int main_size;
|
||||
int main_align;
|
||||
int elan_size;
|
||||
int elan_align;
|
||||
|
||||
RAIL *rail;
|
||||
ELAN4_CTX *ctx;
|
||||
ompi_elan_event_t *elan_ptr;
|
||||
E4_Event *elan_ptr;
|
||||
mca_ptl_elan_send_frag_t *frag;
|
||||
ompi_free_list_t *put_list, *get_list;
|
||||
ompi_ptl_elan_putget_desc_t *put_desc, *get_desc;
|
||||
@ -205,7 +200,7 @@ ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl,
|
||||
main_align = MAX (sizeof (void *), ELAN_ALIGN);
|
||||
elan_align = MAX (sizeof (int *), ELAN_BLOCK_ALIGN);
|
||||
main_size = ALIGNUP(sizeof(ompi_ptl_elan_putget_desc_t), main_align);
|
||||
elan_size = ALIGNUP(sizeof(ompi_elan_event_t), elan_align);
|
||||
elan_size = ALIGNUP(sizeof(E4_Event), elan_align);
|
||||
|
||||
rail = (RAIL *) ptl->ptl_elan_rail;
|
||||
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
|
||||
@ -222,15 +217,15 @@ ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl,
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (frag, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/* Allocating elan related structures */
|
||||
elan_ptr = (ompi_elan_event_t *) elan4_allocElan (rail->r_alloc,
|
||||
elan_ptr = (E4_Event *) elan4_allocElan (rail->r_alloc,
|
||||
elan_align, elan_size * inc_num);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0);
|
||||
|
||||
put_desc = (ompi_ptl_elan_putget_desc_t *) elan4_allocMain (
|
||||
rail->r_alloc, main_align, main_size * inc_num);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (put_desc, NULL, OMPI_ERROR, 0);
|
||||
OMPI_PTL_ELAN_GROW_DESC_LIST(ctx, put_list, frag,
|
||||
put_desc, elan_ptr, main_size, elan_size, 1)
|
||||
OMPI_ELAN_DESC_LIST(ctx, put_list, frag, put_desc, elan_ptr,
|
||||
main_size, elan_size, 1);
|
||||
|
||||
OBJ_CONSTRUCT (&putget->get_desc, ompi_list_t);
|
||||
OBJ_CONSTRUCT (&putget->get_desc_free, ompi_free_list_t);
|
||||
@ -243,15 +238,15 @@ ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl,
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (frag, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/* Allocating elan related structures */
|
||||
elan_ptr = (ompi_elan_event_t *) elan4_allocElan (rail->r_alloc,
|
||||
elan_ptr = (E4_Event *) elan4_allocElan (rail->r_alloc,
|
||||
elan_align, elan_size * inc_num);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0);
|
||||
|
||||
get_desc = (ompi_ptl_elan_putget_desc_t *) elan4_allocMain (
|
||||
rail->r_alloc, main_align, main_size * inc_num);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (get_desc, NULL, OMPI_ERROR, 0);
|
||||
OMPI_PTL_ELAN_GROW_DESC_LIST(ctx, get_list, frag,
|
||||
get_desc, elan_ptr, main_size, elan_size, 0)
|
||||
OMPI_ELAN_DESC_LIST(ctx, get_list, frag, get_desc, elan_ptr,
|
||||
main_size, elan_size, 0);
|
||||
|
||||
END_FUNC();
|
||||
return OMPI_SUCCESS;
|
||||
@ -422,7 +417,7 @@ ompi_init_elan_putget (mca_ptl_elan_component_t * emp,
|
||||
memset (putget, 0, sizeof (ompi_ptl_elan_putget_ctrl_t));
|
||||
|
||||
putget->pg_throttle = PUTGET_THROTTLE;
|
||||
putget->pg_flags = ELAN_PUT_FASTPATH;
|
||||
putget->pg_flags = ELAN_PTL_FASTPATH;
|
||||
putget->pg_retryCount = 16;
|
||||
putget->pg_evictCache = TRUE;
|
||||
putget->pg_waitType = ELAN_POLL_EVENT;
|
||||
@ -430,9 +425,10 @@ ompi_init_elan_putget (mca_ptl_elan_component_t * emp,
|
||||
/* construct the lock variable */
|
||||
OBJ_CONSTRUCT (&putget->pg_lock, ompi_mutex_t);
|
||||
|
||||
*cqp = elan4_probe_cmdq(ctx, rail->r_alloc, 0x10, CQ_AutoCtrlFlowOn);
|
||||
cqp = elan4_probe_cmdq(ctx, rail->r_alloc, 0x10, CQ_AutoCtrlFlowOn);
|
||||
|
||||
putget->put_cmdq = elan4_alloc_cmdq(ctx,
|
||||
rail>r_alloc,
|
||||
rail->r_alloc,
|
||||
CQ_Size8K,
|
||||
CQ_WriteEnableBit |
|
||||
CQ_DmaStartEnableBit |
|
||||
@ -456,14 +452,14 @@ ompi_init_elan_putget (mca_ptl_elan_component_t * emp,
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (putget->pg_cmdStream, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/* Allocate a per vp counter to throttle outstanding get DMAs */
|
||||
putget->pg_pendingGetCount = malloc(sizeof(u_int)*state->nvp);
|
||||
putget->pg_pendingGetCount = malloc(sizeof(u_int)*ptl->elan_nvp);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (putget->pg_pendingGetCount,
|
||||
NULL, OMPI_ERROR, 0);
|
||||
memset(putget->pg_pendingGetCount, 0, sizeof(u_int)*state->nvp);
|
||||
memset(putget->pg_pendingGetCount, 0, sizeof(u_int)*ptl->elan_nvp);
|
||||
|
||||
putget->pg_cpool = elan4_allocCookiePool(ctx, state->vp);
|
||||
putget->pg_cpool = elan4_allocCookiePool(ctx, ptl->elan_vp);
|
||||
|
||||
ompi_ptl_elan_init_putget_ctrl (ptl, rail, putget, 0, 16, 32)
|
||||
ompi_ptl_elan_init_putget_ctrl (ptl, rail, putget, 0, 16, 32);
|
||||
}
|
||||
|
||||
END_FUNC();
|
||||
|
@ -47,7 +47,7 @@ mca_ptl_elan_recv_frag_construct (mca_ptl_elan_recv_frag_t * frag)
|
||||
frag->frag_msg_cnt = 0;
|
||||
frag->frag_progressed = 0;
|
||||
|
||||
frag->frag.qdma = NULL;
|
||||
/*frag->frag.qdma = NULL;*/
|
||||
frag->alloc_buff = (char *) malloc (sizeof (char) * 2048 + 32);
|
||||
if (NULL == frag->alloc_buff) {
|
||||
ompi_output (0,
|
||||
@ -64,7 +64,7 @@ mca_ptl_elan_recv_frag_destruct (mca_ptl_elan_recv_frag_t * frag)
|
||||
frag->frag_msg_cnt = 0;
|
||||
frag->frag_progressed = 0;
|
||||
|
||||
frag->frag.qdma = NULL;
|
||||
/*frag->frag.qdma = NULL;*/
|
||||
free (frag->alloc_buff);
|
||||
frag->alloc_buff = NULL;
|
||||
frag->unex_buff = NULL;
|
||||
@ -139,7 +139,7 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr,
|
||||
}
|
||||
}
|
||||
desc = (mca_ptl_elan_send_frag_t *) item;
|
||||
desc->desc->req = (struct mca_ptl_elan_send_request_t *) sendreq;
|
||||
desc->desc->req = sendreq;
|
||||
|
||||
desc->desc->desc_type = desc_type;
|
||||
|
||||
@ -157,7 +157,7 @@ mca_ptl_elan_alloc_recv_desc (struct mca_pml_base_recv_request_t * req)
|
||||
void
|
||||
mca_ptl_elan_send_desc_done (
|
||||
mca_ptl_elan_send_frag_t *desc,
|
||||
mca_ptl_elan_send_request_t *req)
|
||||
mca_pml_base_send_request_t *req)
|
||||
{
|
||||
mca_ptl_elan_module_t *ptl;
|
||||
ompi_ptl_elan_queue_ctrl_t *queue;
|
||||
|
@ -72,7 +72,7 @@ typedef struct mca_ptl_elan_send_request_t mca_ptl_elan_send_request_t;
|
||||
void
|
||||
mca_ptl_elan_send_desc_done (
|
||||
mca_ptl_elan_send_frag_t *desc,
|
||||
mca_ptl_elan_send_request_t *req);
|
||||
mca_pml_base_send_request_t *req);
|
||||
|
||||
void
|
||||
mca_ptl_elan_recv_frag_done (
|
||||
|
@ -22,8 +22,8 @@
|
||||
struct mca_ptl_elan_peer_t {
|
||||
ompi_list_item_t super;
|
||||
|
||||
struct mca_ptl_elan_module_t* peer_ptl;
|
||||
struct mca_ptl_elan_proc_t* peer_proc;
|
||||
struct mca_ptl_elan_module_t *peer_ptl;
|
||||
struct mca_ptl_elan_proc_t *peer_proc;
|
||||
|
||||
int peer_vp;
|
||||
int peer_rails;
|
||||
|
@ -15,428 +15,6 @@
|
||||
#include "ptl_elan_frag.h"
|
||||
#include "ptl_elan_priv.h"
|
||||
|
||||
/* Initialize an ack descriptor and queue it to the command queue */
|
||||
int
|
||||
mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
||||
mca_ptl_elan_send_frag_t * desc,
|
||||
mca_ptl_elan_recv_frag_t * recv_frag)
|
||||
{
|
||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
||||
mca_ptl_base_header_t *hdr;
|
||||
mca_pml_base_recv_request_t* request;
|
||||
|
||||
|
||||
START_FUNC();
|
||||
|
||||
desc->desc->desc_type = MCA_PTL_ELAN_DESC_QDMA;
|
||||
qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc;
|
||||
|
||||
hdr = (mca_ptl_base_header_t *) & desc->buff[0];
|
||||
|
||||
request = recv_frag->frag_recv.frag_request;
|
||||
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_size = sizeof(mca_ptl_base_ack_header_t);
|
||||
|
||||
hdr->hdr_ack.hdr_src_ptr =
|
||||
recv_frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_src_ptr;
|
||||
hdr->hdr_ack.hdr_dst_match.lval = 0;
|
||||
hdr->hdr_ack.hdr_dst_match.pval = request;
|
||||
hdr->hdr_ack.hdr_dst_addr.lval = 0;
|
||||
hdr->hdr_ack.hdr_dst_addr.pval = request->req_base.req_addr;
|
||||
hdr->hdr_ack.hdr_dst_size = request->req_bytes_packed;
|
||||
|
||||
hdr->hdr_frag.hdr_frag_length = sizeof(mca_ptl_base_ack_header_t);
|
||||
|
||||
/* Filling up QDMA descriptor */
|
||||
qdma->main_dma.dma_srcAddr = MAIN2ELAN (desc->rail->r_ctx,
|
||||
&desc->buff[0]);
|
||||
|
||||
/* XXX: Hardcoded DMA retry count */
|
||||
qdma->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE ((header_length +
|
||||
size_out),
|
||||
DMA_DataTypeByte,
|
||||
DMA_QueueWrite, 16);
|
||||
qdma->main_dma.dma_cookie = elan4_local_cookie (ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||
qdma->main_dma.dma_vproc = ((mca_ptl_elan_peer_t *)
|
||||
recv_frag->frag_recv.frag_base.frag_peer)->peer_vp;
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma);
|
||||
|
||||
/*ptl->queue->tx_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq);
|
||||
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
ompi_list_append (&ptl->queue->tx_desc, (ompi_list_item_t *) desc);
|
||||
|
||||
/* fragment state */
|
||||
desc->desc->req = NULL;
|
||||
desc->frag_base.frag_owner = ptl;
|
||||
desc->frag_base.frag_peer = recv_frag->frag_recv.frag_base.frag_peer;
|
||||
desc->frag_base.frag_addr = NULL;
|
||||
desc->frag_base.frag_size = 0;
|
||||
desc->frag_progressed = 0;
|
||||
desc->desc->desc_status = MCA_PTL_ELAN_DESC_LOCAL;
|
||||
|
||||
END_FUNC();
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
mca_ptl_elan_module_t * ptl,
|
||||
struct mca_ptl_elan_peer_t *ptl_peer,
|
||||
mca_pml_base_send_request_t *pml_req,
|
||||
size_t offset,
|
||||
size_t *size,
|
||||
int flags)
|
||||
{
|
||||
int header_length;
|
||||
int destvp;
|
||||
int size_out;
|
||||
int size_in;
|
||||
int rc = OMPI_SUCCESS;
|
||||
|
||||
mca_ptl_base_header_t *hdr;
|
||||
struct ompi_ptl_elan_qdma_desc_t * desc;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
desc = (ompi_ptl_elan_qdma_desc_t *)frag->desc;
|
||||
destvp = ptl_peer->peer_vp;
|
||||
size_in = *size;
|
||||
|
||||
hdr = (mca_ptl_base_header_t *) & desc->buff[0];
|
||||
|
||||
if(offset == 0) {
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH;
|
||||
hdr->hdr_common.hdr_flags = flags;
|
||||
hdr->hdr_common.hdr_size = sizeof (mca_ptl_base_match_header_t);
|
||||
hdr->hdr_frag.hdr_frag_offset = offset;
|
||||
hdr->hdr_frag.hdr_frag_seq = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.lval = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.pval = desc;
|
||||
hdr->hdr_frag.hdr_dst_ptr.lval = 0;
|
||||
|
||||
hdr->hdr_match.hdr_contextid = pml_req->req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = pml_req->req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_dst = pml_req->req_base.req_peer;
|
||||
hdr->hdr_match.hdr_tag = pml_req->req_base.req_tag;
|
||||
hdr->hdr_match.hdr_msg_length = pml_req->req_bytes_packed;
|
||||
hdr->hdr_match.hdr_msg_seq = pml_req->req_base.req_sequence;
|
||||
header_length = sizeof (mca_ptl_base_match_header_t);
|
||||
} else {
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
|
||||
hdr->hdr_common.hdr_flags = flags;
|
||||
hdr->hdr_common.hdr_size = sizeof (mca_ptl_base_frag_header_t);
|
||||
hdr->hdr_frag.hdr_frag_offset = offset;
|
||||
hdr->hdr_frag.hdr_frag_seq = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.lval = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.pval = desc;
|
||||
hdr->hdr_frag.hdr_dst_ptr = pml_req->req_peer_match;
|
||||
header_length = sizeof (mca_ptl_base_frag_header_t);
|
||||
}
|
||||
|
||||
/* initialize convertor */
|
||||
if(size_in > 0) {
|
||||
struct iovec iov;
|
||||
ompi_convertor_t *convertor;
|
||||
|
||||
if( offset <= mca_ptl_elan_module.super.ptl_first_frag_size ) {
|
||||
convertor = &pml_req->req_convertor;
|
||||
} else {
|
||||
convertor = &frag->frag_base->frag_convertor;
|
||||
ompi_convertor_copy(&pml_req->req_convertor, convertor);
|
||||
ompi_convertor_init_for_send(
|
||||
convertor,
|
||||
0,
|
||||
pml_req->req_base.req_datatype,
|
||||
pml_req->req_base.req_count,
|
||||
pml_req->req_base.req_addr,
|
||||
offset);
|
||||
}
|
||||
|
||||
/* For now, eager sends are always packed into the descriptor
|
||||
* TODO: Inline up to 256 bytes (including the header), then
|
||||
* do a chained send for mesg < first_frag_size */
|
||||
iov.iov_base = &desc->buff[header_length];
|
||||
iov.iov_len = size_in;
|
||||
rc = ompi_convertor_pack(convertor, &iov, 1);
|
||||
if (rc < 0) {
|
||||
ompi_output (0, "[%s:%d] Unable to pack data\n",
|
||||
__FILE__, __LINE__);
|
||||
return;
|
||||
}
|
||||
size_out = iov.iov_len;
|
||||
} else {
|
||||
size_out = size_in;
|
||||
}
|
||||
|
||||
*size = size_out;
|
||||
hdr->hdr_frag.hdr_frag_length = size_out;
|
||||
|
||||
desc->main_dma.dma_srcAddr = MAIN2ELAN (desc->rail->r_ctx,
|
||||
&desc->buff[0]);
|
||||
|
||||
/* XXX: Hardcoded DMA retry count */
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE ((header_length +
|
||||
size_out),
|
||||
DMA_DataTypeByte,
|
||||
DMA_QueueWrite, 16);
|
||||
|
||||
desc->main_dma.dma_cookie =
|
||||
elan4_local_cookie (ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||
|
||||
if (CHECK_ELAN) {
|
||||
char hostname[32];
|
||||
|
||||
gethostname(hostname, 32);
|
||||
fprintf(stderr, "[%s send...] destvp %d type %d flag %d size %d\n",
|
||||
hostname, destvp, hdr->hdr_common.hdr_type,
|
||||
hdr->hdr_common.hdr_flags,
|
||||
hdr->hdr_common.hdr_size);
|
||||
}
|
||||
|
||||
desc->main_dma.dma_vproc = destvp;
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
END_FUNC();
|
||||
}
|
||||
|
||||
static void
|
||||
mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
mca_ptl_elan_module_t * ptl,
|
||||
struct mca_ptl_elan_peer_t *ptl_peer,
|
||||
mca_pml_base_send_request_t *pml_req,
|
||||
size_t offset,
|
||||
size_t *size,
|
||||
int flags)
|
||||
{
|
||||
int header_length;
|
||||
int destvp;
|
||||
int size_out;
|
||||
int size_in;
|
||||
int flags;
|
||||
int rc = OMPI_SUCCESS;
|
||||
|
||||
struct ompi_ptl_elan_putget_desc_t * desc;
|
||||
|
||||
mca_ptl_base_header_t *hdr;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
desc = (ompi_ptl_elan_putget_desc_t *)frag->desc;
|
||||
destvp = ptl_peer->peer_vp;
|
||||
size_in = *size;
|
||||
hdr = &sendfrag->frag_base.frag_header;
|
||||
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
|
||||
hdr->hdr_common.hdr_flags = flags;
|
||||
hdr->hdr_common.hdr_size = sizeof(mca_ptl_base_frag_header_t);
|
||||
hdr->hdr_frag.hdr_frag_offset = offset;
|
||||
hdr->hdr_frag.hdr_frag_seq = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.lval = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.pval = frag;
|
||||
hdr->hdr_frag.hdr_dst_ptr = pml_req->req_peer_match;
|
||||
|
||||
desc->src_elan_addr = MAIN2ELAN (desc->rail->r_ctx,
|
||||
pml_req->req_base.req_addr);
|
||||
desc->dst_elan_addr = (E4_Addr)pml_req->req_peer_addr;
|
||||
|
||||
#define PUT_NON_CONTIGUOUS_DATA 0
|
||||
/* initialize convertor */
|
||||
if(size_in > 0 && PUT_NON_CONTIGUOUS_DATA) {
|
||||
struct iovec iov;
|
||||
ompi_convertor_t *convertor;
|
||||
|
||||
if( offset <= mca_ptl_elan_module.super.ptl_first_frag_size ) {
|
||||
convertor = &pml_req->req_convertor;
|
||||
} else {
|
||||
convertor = &frag->frag_base.frag_convertor;
|
||||
ompi_convertor_copy(&pml_req->req_convertor, convertor);
|
||||
ompi_convertor_init_for_send(
|
||||
convertor,
|
||||
0,
|
||||
pml_req->req_base.req_datatype,
|
||||
pml_req->req_base.req_count,
|
||||
pml_req->req_base.req_addr,
|
||||
offset);
|
||||
}
|
||||
|
||||
/* For now, eager sends are always packed into the descriptor
|
||||
* TODO: Inline up to 256 bytes (including the header), then
|
||||
* do a chained send for mesg < first_frag_size */
|
||||
|
||||
desc->src_elan_addr = elan4_main2elan(ptl->ptl_elan_ctx,
|
||||
desc->desc_buff);
|
||||
iov.iov_base = desc->desc_buff;
|
||||
iov.iov_len = size_in;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, 1);
|
||||
if (rc < 0) {
|
||||
ompi_output (0, "[%s:%d] Unable to pack data\n",
|
||||
__FILE__, __LINE__);
|
||||
return;
|
||||
}
|
||||
size_out = iov.iov_len;
|
||||
} else {
|
||||
size_out = size_in;
|
||||
}
|
||||
|
||||
*size = size_out;
|
||||
hdr->hdr_frag.hdr_frag_length = size_out;
|
||||
|
||||
|
||||
/* XXX: no additional flags for the DMA, remote, shmem, qwrite,
|
||||
* broadcast, etc */
|
||||
flags = 0;
|
||||
|
||||
#define MCA_PTL_ELAN_USE_CHAINED_DMA 0
|
||||
|
||||
#if defined(MCA_PTL_ELAN_USE_CHAINED_DMA)
|
||||
/* Setup a chained DMA
|
||||
* FIXME: remember
|
||||
*/
|
||||
/* Setup the chain dma */
|
||||
desc->chain_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||
sizeof(mca_ptl_base_frag_header_t),
|
||||
DMA_DataTypeByte, DMA_QueueWrite, 8);
|
||||
desc->chain_dma.dma_cookie = elan4_local_cookie(ptl->putget->pg_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||
desc->chain_dma.dma_vproc = destvp;
|
||||
desc->chain_dma.dma_srcAddr = elan4_main2elan (ctx, (void *) hdr);
|
||||
desc->chain_dma.dma_dstAddr = 0x0ULL;
|
||||
desc->chain_dma.dma_srcEvent = SDRAM2ELAN (ctx,
|
||||
&desc->elan_data_event->event32);
|
||||
/* causes the inputter to redirect the dma to the inputq */
|
||||
desc->chain_dma.dma_dstEvent = elan4_main2elan (ctx,
|
||||
(void *) ptl->queue->input);
|
||||
|
||||
INITEVENT_WORD (ctx, (EVENT *) & desc->elan_data_event->event32,
|
||||
&desc->main_doneWord);
|
||||
RESETEVENT_WORD (&desc->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx,
|
||||
(EVENT *) & desc->elan_data_event->event32, 1);
|
||||
|
||||
desc->chain_dma.dma_typeSize |= RUN_DMA_CMD;
|
||||
desc->chain_dma.dma_pad = NOP_CMD;
|
||||
|
||||
/* Copy down the chain dma to the chain buffer in elan sdram */
|
||||
memcpy ((void *)desc->chain_buf, (void *)&chain_dma, sizeof (E4_DMA64));
|
||||
desc->chain_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||
desc->chain_event->ev_Params[0] = elan4_main2elan (ctx,
|
||||
(void *)desc->chain_buf);
|
||||
/* XXX:
|
||||
* The chain dma will go directly into a command stream
|
||||
* so we need addend the command queue control bits.
|
||||
* Allocate space from command queues hanged off the CTX.
|
||||
*/
|
||||
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
#endif
|
||||
|
||||
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
||||
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
|
||||
|
||||
/* Chain an event */
|
||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->chain_event);
|
||||
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
|
||||
|
||||
/* XXX: Hardcoded DMA retry count */
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (size_out,
|
||||
DMA_DataTypeByte, flags, putget->pg_retryCount);
|
||||
|
||||
/* Just a normal DMA, no need to have additional flags */
|
||||
desc->main_dma.dma_cookie = elan4_local_cookie (
|
||||
ptl->putget->pg_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA,
|
||||
destvp);
|
||||
desc->main_dma.dma_vproc = destvp;
|
||||
|
||||
if (CHECK_ELAN) {
|
||||
char hostname[32];
|
||||
|
||||
gethostname(hostname, 32);
|
||||
fprintf(stderr, "[%s send...] destvp %d type %d flag %d size %d\n",
|
||||
hostname, destvp, hdr->hdr_common.hdr_type,
|
||||
hdr->hdr_common.hdr_flags,
|
||||
hdr->hdr_common.hdr_size);
|
||||
}
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
END_FUNC();
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc,
|
||||
struct mca_ptl_elan_peer_t *ptl_peer,
|
||||
struct mca_pml_base_send_request_t *sendreq,
|
||||
size_t offset,
|
||||
size_t *size,
|
||||
int flags)
|
||||
{
|
||||
mca_ptl_elan_module_t *ptl;
|
||||
|
||||
ptl = &ptl_peer->peer_ptl;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_QDMA) {
|
||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
||||
|
||||
qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc;
|
||||
|
||||
mca_ptl_elan_init_qdma_desc (qdma, ptl, ptl_peer, sendreq,
|
||||
offset, size, flags);
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma);
|
||||
/*ptl->queue->tx_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq);
|
||||
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
ompi_list_append (&ptl->queue->tx_desc, (ompi_list_item_t *) desc);
|
||||
|
||||
} else if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_PUTGET) {
|
||||
|
||||
struct ompi_ptl_elan_putget_desc_t *pdesc;
|
||||
|
||||
pdesc = (ompi_ptl_elan_putget_desc_t *)desc->desc;
|
||||
|
||||
/* For each put/get descriptor, a QDMA is chained off. */
|
||||
mca_ptl_elan_init_putget_desc (pdesc, ptl, ptl_peer, sendreq,
|
||||
offset, size, flags);
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & pdesc->main_dma);
|
||||
/*ptl->queue->tx_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq);
|
||||
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
ompi_list_append (&ptl->queue->put_desc, (ompi_list_item_t *) desc);
|
||||
} else {
|
||||
ompi_output (0, "Other types of DMA are not supported right now \n");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* fragment state */
|
||||
desc->frag_base.frag_owner = &ptl_peer->peer_ptl->super;
|
||||
desc->frag_base.frag_peer = ptl_peer;
|
||||
desc->frag_base.frag_addr = NULL;
|
||||
desc->frag_base.frag_size = *size;
|
||||
desc->frag_progressed = 0;
|
||||
desc->frag_ack_pending = 0; /* this is ack for internal elan */
|
||||
|
||||
END_FUNC();
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
mca_ptl_elan_data_frag (struct mca_ptl_elan_module_t *ptl,
|
||||
mca_ptl_base_header_t * header)
|
||||
@ -444,7 +22,7 @@ mca_ptl_elan_data_frag (struct mca_ptl_elan_module_t *ptl,
|
||||
/* Allocate a recv frag descriptor */
|
||||
mca_ptl_elan_recv_frag_t *recv_frag;
|
||||
ompi_list_item_t *item;
|
||||
mca_pml_base_recv_request_t *request;
|
||||
/*mca_pml_base_recv_request_t *request;*/
|
||||
|
||||
bool matched;
|
||||
int rc = OMPI_SUCCESS;
|
||||
@ -528,6 +106,485 @@ mca_ptl_elan_ctrl_frag (struct mca_ptl_elan_module_t *ptl,
|
||||
mca_ptl_elan_send_desc_done (desc, req);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
mca_ptl_elan_module_t * ptl,
|
||||
struct mca_ptl_elan_peer_t *ptl_peer,
|
||||
mca_pml_base_send_request_t *pml_req,
|
||||
size_t offset,
|
||||
size_t *size,
|
||||
int flags)
|
||||
{
|
||||
int header_length;
|
||||
int destvp;
|
||||
int size_out;
|
||||
int size_in;
|
||||
int rc = OMPI_SUCCESS;
|
||||
|
||||
mca_ptl_base_header_t *hdr;
|
||||
struct ompi_ptl_elan_qdma_desc_t * desc;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
desc = (ompi_ptl_elan_qdma_desc_t *)frag->desc;
|
||||
destvp = ptl_peer->peer_vp;
|
||||
size_in = *size;
|
||||
|
||||
hdr = (mca_ptl_base_header_t *) & desc->buff[0];
|
||||
|
||||
if(offset == 0) {
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH;
|
||||
hdr->hdr_common.hdr_flags = flags;
|
||||
hdr->hdr_common.hdr_size = sizeof (mca_ptl_base_match_header_t);
|
||||
hdr->hdr_frag.hdr_frag_offset = offset;
|
||||
hdr->hdr_frag.hdr_frag_seq = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.lval = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.pval = desc;
|
||||
hdr->hdr_frag.hdr_dst_ptr.lval = 0;
|
||||
|
||||
hdr->hdr_match.hdr_contextid = pml_req->req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = pml_req->req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_dst = pml_req->req_base.req_peer;
|
||||
hdr->hdr_match.hdr_tag = pml_req->req_base.req_tag;
|
||||
hdr->hdr_match.hdr_msg_length = pml_req->req_bytes_packed;
|
||||
hdr->hdr_match.hdr_msg_seq = pml_req->req_base.req_sequence;
|
||||
header_length = sizeof (mca_ptl_base_match_header_t);
|
||||
} else {
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
|
||||
hdr->hdr_common.hdr_flags = flags;
|
||||
hdr->hdr_common.hdr_size = sizeof (mca_ptl_base_frag_header_t);
|
||||
hdr->hdr_frag.hdr_frag_offset = offset;
|
||||
hdr->hdr_frag.hdr_frag_seq = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.lval = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.pval = desc;
|
||||
hdr->hdr_frag.hdr_dst_ptr = pml_req->req_peer_match;
|
||||
header_length = sizeof (mca_ptl_base_frag_header_t);
|
||||
}
|
||||
|
||||
/* initialize convertor */
|
||||
if(size_in > 0) {
|
||||
struct iovec iov;
|
||||
ompi_convertor_t *convertor;
|
||||
|
||||
if( offset <= mca_ptl_elan_module.super.ptl_first_frag_size ) {
|
||||
convertor = &pml_req->req_convertor;
|
||||
} else {
|
||||
convertor = &frag->frag_base.frag_convertor;
|
||||
ompi_convertor_copy(&pml_req->req_convertor, convertor);
|
||||
ompi_convertor_init_for_send(
|
||||
convertor,
|
||||
0,
|
||||
pml_req->req_base.req_datatype,
|
||||
pml_req->req_base.req_count,
|
||||
pml_req->req_base.req_addr,
|
||||
offset);
|
||||
}
|
||||
|
||||
/* For now, eager sends are always packed into the descriptor
|
||||
* TODO: Inline up to 256 bytes (including the header), then
|
||||
* do a chained send for mesg < first_frag_size */
|
||||
iov.iov_base = &desc->buff[header_length];
|
||||
iov.iov_len = size_in;
|
||||
rc = ompi_convertor_pack(convertor, &iov, 1);
|
||||
if (rc < 0) {
|
||||
ompi_output (0, "[%s:%d] Unable to pack data\n",
|
||||
__FILE__, __LINE__);
|
||||
return;
|
||||
}
|
||||
size_out = iov.iov_len;
|
||||
} else {
|
||||
size_out = size_in;
|
||||
}
|
||||
|
||||
*size = size_out;
|
||||
hdr->hdr_frag.hdr_frag_length = size_out;
|
||||
|
||||
desc->main_dma.dma_srcAddr = MAIN2ELAN (desc->ptl->ptl_elan_ctx,
|
||||
&desc->buff[0]);
|
||||
|
||||
/* XXX: Hardcoded DMA retry count */
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE ((header_length +
|
||||
size_out),
|
||||
DMA_DataTypeByte,
|
||||
DMA_QueueWrite, 16);
|
||||
|
||||
desc->main_dma.dma_cookie =
|
||||
elan4_local_cookie (ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||
|
||||
if (CHECK_ELAN) {
|
||||
char hostname[32];
|
||||
|
||||
gethostname(hostname, 32);
|
||||
fprintf(stderr, "[%s send...] destvp %d type %d flag %d size %d\n",
|
||||
hostname, destvp, hdr->hdr_common.hdr_type,
|
||||
hdr->hdr_common.hdr_flags,
|
||||
hdr->hdr_common.hdr_size);
|
||||
}
|
||||
|
||||
desc->main_dma.dma_vproc = destvp;
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
END_FUNC();
|
||||
}
|
||||
|
||||
static void
|
||||
mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
mca_ptl_elan_module_t * ptl,
|
||||
struct mca_ptl_elan_peer_t *ptl_peer,
|
||||
mca_pml_base_send_request_t *pml_req,
|
||||
size_t offset,
|
||||
size_t *size,
|
||||
int flags)
|
||||
{
|
||||
int destvp;
|
||||
int size_out;
|
||||
int size_in;
|
||||
int rc = OMPI_SUCCESS;
|
||||
ELAN4_CTX *ctx;
|
||||
|
||||
struct ompi_ptl_elan_putget_desc_t * desc;
|
||||
|
||||
mca_ptl_base_header_t *hdr;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
hdr = &frag->frag_base.frag_header;
|
||||
desc = (ompi_ptl_elan_putget_desc_t *)frag->desc;
|
||||
destvp = ptl_peer->peer_vp;
|
||||
size_in = *size;
|
||||
ctx = ptl->ptl_elan_ctx;
|
||||
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
|
||||
hdr->hdr_common.hdr_flags = flags;
|
||||
hdr->hdr_common.hdr_size = sizeof(mca_ptl_base_frag_header_t);
|
||||
hdr->hdr_frag.hdr_frag_offset = offset;
|
||||
hdr->hdr_frag.hdr_frag_seq = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.lval = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.pval = frag;
|
||||
hdr->hdr_frag.hdr_dst_ptr = pml_req->req_peer_match;
|
||||
|
||||
desc->src_elan_addr = MAIN2ELAN (ctx, pml_req->req_base.req_addr);
|
||||
desc->dst_elan_addr = (E4_Addr)pml_req->req_peer_addr.lval;
|
||||
|
||||
#define PUT_NON_CONTIGUOUS_DATA 0
|
||||
/* initialize convertor */
|
||||
if(size_in > 0 && PUT_NON_CONTIGUOUS_DATA) {
|
||||
struct iovec iov;
|
||||
ompi_convertor_t *convertor;
|
||||
|
||||
if( offset <= mca_ptl_elan_module.super.ptl_first_frag_size ) {
|
||||
convertor = &pml_req->req_convertor;
|
||||
} else {
|
||||
convertor = &frag->frag_base.frag_convertor;
|
||||
ompi_convertor_copy(&pml_req->req_convertor, convertor);
|
||||
ompi_convertor_init_for_send(
|
||||
convertor,
|
||||
0,
|
||||
pml_req->req_base.req_datatype,
|
||||
pml_req->req_base.req_count,
|
||||
pml_req->req_base.req_addr,
|
||||
offset);
|
||||
}
|
||||
|
||||
/* For now, eager sends are always packed into the descriptor
|
||||
* TODO: Inline up to 256 bytes (including the header), then
|
||||
* do a chained send for mesg < first_frag_size */
|
||||
|
||||
desc->src_elan_addr = elan4_main2elan(ctx, desc->desc_buff);
|
||||
iov.iov_base = desc->desc_buff;
|
||||
iov.iov_len = size_in;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, 1);
|
||||
if (rc < 0) {
|
||||
ompi_output (0, "[%s:%d] Unable to pack data\n",
|
||||
__FILE__, __LINE__);
|
||||
return;
|
||||
}
|
||||
size_out = iov.iov_len;
|
||||
} else {
|
||||
size_out = size_in;
|
||||
}
|
||||
|
||||
*size = size_out;
|
||||
hdr->hdr_frag.hdr_frag_length = size_out;
|
||||
|
||||
|
||||
/* XXX: no additional flags for the DMA, remote, shmem, qwrite,
|
||||
* broadcast, etc */
|
||||
flags = 0;
|
||||
|
||||
#define MCA_PTL_ELAN_USE_CHAINED_DMA 0
|
||||
|
||||
#if defined(MCA_PTL_ELAN_USE_CHAINED_DMA)
|
||||
/* Setup a chained DMA
|
||||
* FIXME: remember
|
||||
*/
|
||||
/* Setup the chain dma */
|
||||
desc->chain_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||
sizeof(mca_ptl_base_frag_header_t),
|
||||
DMA_DataTypeByte, DMA_QueueWrite, 8);
|
||||
desc->chain_dma.dma_cookie = elan4_local_cookie(ptl->putget->pg_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||
desc->chain_dma.dma_vproc = destvp;
|
||||
desc->chain_dma.dma_srcAddr =
|
||||
elan4_main2elan (ctx, (void *) hdr);
|
||||
desc->chain_dma.dma_dstAddr = 0x0ULL;
|
||||
desc->chain_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event);
|
||||
/* causes the inputter to redirect the dma to the inputq */
|
||||
desc->chain_dma.dma_dstEvent = elan4_main2elan (ctx,
|
||||
(void *) ptl->queue->input);
|
||||
|
||||
INITEVENT_WORD (ctx, (E4_Event *) desc->elan_event, &desc->main_doneWord);
|
||||
RESETEVENT_WORD (&desc->main_doneWord);
|
||||
|
||||
/* Be sure that padding E4_Event is not causing problems */
|
||||
PRIMEEVENT_WORD (ctx, (E4_Event *)desc->elan_event, 1);
|
||||
|
||||
desc->chain_dma.dma_typeSize |= RUN_DMA_CMD;
|
||||
desc->chain_dma.dma_pad = NOP_CMD;
|
||||
|
||||
/* Copy down the chain dma to the chain buffer in elan sdram */
|
||||
memcpy ((void *)desc->chain_buff, (void *)&desc->chain_dma,
|
||||
sizeof (E4_DMA64));
|
||||
desc->chain_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||
desc->chain_event->ev_Params[0] = elan4_main2elan (ctx,
|
||||
(void *)desc->chain_buff);
|
||||
/* XXX:
|
||||
* The chain dma will go directly into a command stream
|
||||
* so we need addend the command queue control bits.
|
||||
* Allocate space from command queues hanged off the CTX.
|
||||
*/
|
||||
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
#endif
|
||||
|
||||
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
||||
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
|
||||
|
||||
/* Chain an event */
|
||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx,
|
||||
(E4_Event *)desc->chain_event);
|
||||
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
|
||||
|
||||
/* XXX: Hardcoded DMA retry count */
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (size_out,
|
||||
DMA_DataTypeByte, flags, ptl->putget->pg_retryCount);
|
||||
|
||||
/* Just a normal DMA, no need to have additional flags */
|
||||
desc->main_dma.dma_cookie = elan4_local_cookie (
|
||||
ptl->putget->pg_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA,
|
||||
destvp);
|
||||
desc->main_dma.dma_vproc = destvp;
|
||||
|
||||
if (CHECK_ELAN) {
|
||||
char hostname[32];
|
||||
|
||||
gethostname(hostname, 32);
|
||||
fprintf(stderr, "[%s send...] destvp %d type %d flag %d size %d\n",
|
||||
hostname, destvp, hdr->hdr_common.hdr_type,
|
||||
hdr->hdr_common.hdr_flags,
|
||||
hdr->hdr_common.hdr_size);
|
||||
}
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
END_FUNC();
|
||||
}
|
||||
|
||||
int
|
||||
mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc,
|
||||
struct mca_ptl_elan_peer_t *ptl_peer,
|
||||
struct mca_pml_base_send_request_t *sendreq,
|
||||
size_t offset,
|
||||
size_t *size,
|
||||
int flags)
|
||||
{
|
||||
mca_ptl_elan_module_t *ptl;
|
||||
|
||||
ptl = ptl_peer->peer_ptl;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_QDMA) {
|
||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
||||
|
||||
qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc;
|
||||
|
||||
mca_ptl_elan_init_qdma_desc (desc, ptl, ptl_peer, sendreq,
|
||||
offset, size, flags);
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma);
|
||||
/*ptl->queue->tx_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq);
|
||||
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
ompi_list_append (&ptl->queue->tx_desc, (ompi_list_item_t *) desc);
|
||||
|
||||
} else if (MCA_PTL_ELAN_DESC_PUT == desc->desc->desc_type) {
|
||||
|
||||
struct ompi_ptl_elan_putget_desc_t *pdesc;
|
||||
|
||||
pdesc = (ompi_ptl_elan_putget_desc_t *)desc->desc;
|
||||
|
||||
/* For each put/get descriptor, a QDMA is chained off. */
|
||||
mca_ptl_elan_init_putget_desc (desc, ptl, ptl_peer, sendreq,
|
||||
offset, size, flags);
|
||||
elan4_run_dma_cmd (ptl->putget->put_cmdq, (E4_DMA *) &pdesc->main_dma);
|
||||
|
||||
/*ptl->queue->tx_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (ptl->putget->put_cmdq);
|
||||
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
ompi_list_append (&ptl->putget->put_desc, (ompi_list_item_t *) desc);
|
||||
} else {
|
||||
ompi_output (0, "To support GET and Other types of DMA "
|
||||
"are not supported right now \n");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* fragment state */
|
||||
desc->frag_base.frag_owner = (struct mca_ptl_base_module_t *)
|
||||
&ptl_peer->peer_ptl->super;
|
||||
desc->frag_base.frag_peer = (struct mca_ptl_base_peer_t *) ptl_peer;
|
||||
desc->frag_base.frag_addr = NULL;
|
||||
desc->frag_base.frag_size = *size;
|
||||
desc->frag_progressed = 0;
|
||||
desc->frag_ack_pending = 0; /* this is ack for internal elan */
|
||||
|
||||
END_FUNC();
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initialize an ack descriptor and queue it to the command queue */
|
||||
int
|
||||
mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
||||
mca_ptl_elan_send_frag_t * desc,
|
||||
mca_ptl_elan_recv_frag_t * recv_frag)
|
||||
{
|
||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
||||
mca_ptl_base_header_t *hdr;
|
||||
mca_pml_base_recv_request_t* request;
|
||||
mca_ptl_elan_module_t *elan_ptl;
|
||||
|
||||
int destvp;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
destvp = ((mca_ptl_elan_peer_t *)
|
||||
recv_frag->frag_recv.frag_base.frag_peer)->peer_vp;
|
||||
|
||||
elan_ptl = (mca_ptl_elan_module_t *) ptl;
|
||||
desc->desc->desc_type = MCA_PTL_ELAN_DESC_QDMA;
|
||||
qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc;
|
||||
|
||||
hdr = (mca_ptl_base_header_t *) & qdma->buff[0];
|
||||
|
||||
request = recv_frag->frag_recv.frag_request;
|
||||
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_size = sizeof(mca_ptl_base_ack_header_t);
|
||||
|
||||
hdr->hdr_ack.hdr_src_ptr =
|
||||
recv_frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_src_ptr;
|
||||
hdr->hdr_ack.hdr_dst_match.lval = 0;
|
||||
hdr->hdr_ack.hdr_dst_match.pval = request;
|
||||
hdr->hdr_ack.hdr_dst_addr.lval = 0;
|
||||
hdr->hdr_ack.hdr_dst_addr.pval = request->req_base.req_addr;
|
||||
hdr->hdr_ack.hdr_dst_size = request->req_bytes_packed;
|
||||
hdr->hdr_frag.hdr_frag_length = sizeof(mca_ptl_base_ack_header_t);
|
||||
|
||||
/* Filling up QDMA descriptor */
|
||||
qdma->main_dma.dma_srcAddr = elan4_main2elan(
|
||||
elan_ptl->ptl_elan_ctx, &qdma->buff[0]);
|
||||
|
||||
/* XXX: Hardcoded DMA retry count */
|
||||
qdma->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||
sizeof(mca_ptl_base_ack_header_t),
|
||||
DMA_DataTypeByte, DMA_QueueWrite, 16);
|
||||
qdma->main_dma.dma_vproc = destvp;
|
||||
qdma->main_dma.dma_cookie = elan4_local_cookie (
|
||||
elan_ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
|
||||
elan4_run_dma_cmd (elan_ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma);
|
||||
|
||||
/*ptl->queue->tx_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (elan_ptl->queue->tx_cmdq);
|
||||
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
ompi_list_append (&elan_ptl->queue->tx_desc, (ompi_list_item_t *) desc);
|
||||
|
||||
/* fragment state */
|
||||
desc->desc->req = NULL;
|
||||
desc->frag_base.frag_owner = ptl;
|
||||
desc->frag_base.frag_peer = recv_frag->frag_recv.frag_base.frag_peer;
|
||||
desc->frag_base.frag_addr = NULL;
|
||||
desc->frag_base.frag_size = 0;
|
||||
desc->frag_progressed = 0;
|
||||
desc->desc->desc_status = MCA_PTL_ELAN_DESC_LOCAL;
|
||||
|
||||
END_FUNC();
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
#if 0
|
||||
int
|
||||
mca_ptl_elan_start_ack (mca_ptl_elan_send_frag_t * desc,
|
||||
struct mca_ptl_elan_peer_t *ptl_peer,
|
||||
struct mca_pml_base_send_request_t *sendreq,
|
||||
size_t offset,
|
||||
size_t *size,
|
||||
int flags)
|
||||
{
|
||||
mca_ptl_elan_module_t *ptl;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_QDMA) {
|
||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
||||
|
||||
qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc;
|
||||
ptl = qdma->ptl;
|
||||
|
||||
mca_ptl_elan_init_qdma_desc (qdma, ptl, ptl_peer, sendreq,
|
||||
offset, size, flags);
|
||||
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma);
|
||||
|
||||
/*ptl->queue->tx_cmdq->cmdq_flush */
|
||||
elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq);
|
||||
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
ompi_list_append (&ptl->queue->tx_desc, (ompi_list_item_t *) desc);
|
||||
|
||||
} else {
|
||||
ompi_output (0,
|
||||
"Other types of DMA are not supported right now \n");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/*mca_ptl_base_frag_t frag_base; */
|
||||
|
||||
/* fragment state */
|
||||
desc->frag_base.frag_owner = &ptl_peer->peer_ptl->super;
|
||||
desc->frag_base.frag_peer = ptl_peer;
|
||||
desc->frag_base.frag_addr = NULL;
|
||||
desc->frag_base.frag_size = *size;
|
||||
desc->frag_progressed = 0;
|
||||
desc->frag_ack_pending = 0; /* this is ack for internal elan */
|
||||
|
||||
END_FUNC();
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp)
|
||||
{
|
||||
@ -670,12 +727,15 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
|
||||
if (rc) {
|
||||
mca_ptl_base_header_t *header;
|
||||
mca_ptl_elan_send_request_t *req;
|
||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
||||
|
||||
/* Remove the desc, update the request, put back to free list */
|
||||
desc = (mca_ptl_elan_send_frag_t *)
|
||||
ompi_list_remove_first (&queue->tx_desc);
|
||||
req = desc->desc->req;
|
||||
header = (mca_ptl_base_header_t *)&
|
||||
((ompi_ptl_elan_qdma_desc_t *)desc->desc)->buff[0];
|
||||
qdma = (ompi_ptl_elan_qdma_desc_t*)desc->desc;
|
||||
|
||||
req = (mca_ptl_elan_send_request_t *)qdma->req;
|
||||
header = (mca_ptl_base_header_t *)&qdma->buff[0];
|
||||
|
||||
if (CHECK_ELAN) {
|
||||
char hostname[32];
|
||||
@ -691,12 +751,9 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
|
||||
mca_ptl_elan_send_desc_done (desc, req);
|
||||
|
||||
/* Remember to reset the events */
|
||||
INITEVENT_WORD (ctx,
|
||||
(EVENT *) & desc->elan_data_event->event32,
|
||||
&desc->main_doneWord);
|
||||
RESETEVENT_WORD (&desc->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx,
|
||||
(EVENT *) & desc->elan_data_event->event32, 1);
|
||||
INITEVENT_WORD (ctx, qdma->elan_event, &qdma->main_doneWord);
|
||||
RESETEVENT_WORD (&qdma->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx, qdma->elan_event, 1);
|
||||
|
||||
} else {
|
||||
/* XXX: Stop at any incomplete send desc */
|
||||
|
@ -121,7 +121,7 @@ typedef struct ompi_ptl_elan_recv_queue_t ompi_ptl_elan_recv_queue_t;
|
||||
/* 8 byte aligned */ \
|
||||
volatile E4_uint64 main_doneWord; \
|
||||
/* 8 byte aligned */ \
|
||||
E4_Event32 *elan_event; \
|
||||
E4_Event *elan_event; \
|
||||
uint8_t *desc_buff; \
|
||||
/* 8 byte aligned */ \
|
||||
mca_pml_base_send_request_t *req; \
|
||||
@ -196,6 +196,7 @@ struct ompi_ptl_elan_putget_ctrl_t {
|
||||
int pg_evictCache;
|
||||
int32_t pg_waitType;
|
||||
ELAN_FLAGS pg_flags;
|
||||
ompi_mutex_t pg_lock;
|
||||
|
||||
E4_CmdQ *put_cmdq;
|
||||
E4_CmdQ *get_cmdq;
|
||||
@ -285,6 +286,10 @@ int mca_ptl_elan_start_desc(mca_ptl_elan_send_frag_t *desc,
|
||||
size_t *size,
|
||||
int flags);
|
||||
|
||||
int mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
||||
mca_ptl_elan_send_frag_t * desc,
|
||||
mca_ptl_elan_recv_frag_t * recv_frag);
|
||||
|
||||
int mca_ptl_elan_poll_desc(mca_ptl_elan_send_frag_t *desc);
|
||||
int mca_ptl_elan_wait_desc(mca_ptl_elan_send_frag_t *desc);
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user