get chained DMA done, to test and add threading support
This commit was SVN r1913.
Этот коммит содержится в:
родитель
a9e22a500d
Коммит
f4ac830175
@ -245,7 +245,8 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl,
|
||||
if (offset == 0) { /* The first fragment uses a cached desc */
|
||||
desc = ((mca_ptl_elan_send_request_t*)sendreq)->req_frag;
|
||||
} else {
|
||||
desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, 0);
|
||||
desc = mca_ptl_elan_alloc_send_desc(ptl,
|
||||
sendreq, MCA_PTL_ELAN_DESC_QDMA);
|
||||
if (NULL == desc) {
|
||||
ompi_output(0,
|
||||
"[%s:%d] Unable to allocate an elan send descriptors \n",
|
||||
@ -288,7 +289,7 @@ mca_ptl_elan_put (struct mca_ptl_base_module_t *ptl,
|
||||
|
||||
START_FUNC();
|
||||
|
||||
desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, 1);
|
||||
desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, MCA_PTL_ELAN_DESC_PUT);
|
||||
if (NULL == desc) {
|
||||
ompi_output(0,
|
||||
"[%s:%d] Unable to allocate an elan send descriptors \n",
|
||||
@ -317,7 +318,30 @@ mca_ptl_elan_get (struct mca_ptl_base_module_t *ptl,
|
||||
size_t size,
|
||||
int flags)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
int rc = OMPI_SUCCESS;
|
||||
mca_ptl_elan_send_frag_t *desc;
|
||||
|
||||
/* XXX:
|
||||
* Since the address passed down from PML does not provide
|
||||
* elan information, so there needs to be a change
|
||||
*/
|
||||
|
||||
START_FUNC();
|
||||
|
||||
desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, MCA_PTL_ELAN_DESC_GET);
|
||||
if (NULL == desc) {
|
||||
ompi_output(0,
|
||||
"[%s:%d] Unable to allocate an elan send descriptors \n",
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
|
||||
rc = mca_ptl_elan_start_desc(desc,
|
||||
(struct mca_ptl_elan_peer_t *)ptl_peer,
|
||||
sendreq, offset, &size, flags);
|
||||
|
||||
/* Update all the sends until the put is done */
|
||||
END_FUNC();
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -97,12 +97,13 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
||||
{
|
||||
desc->main_dma.dma_dstAddr = 0;
|
||||
desc->main_dma.dma_srcEvent =
|
||||
SDRAM2ELAN (ctx, &elan_ptr->event32);
|
||||
SDRAM2ELAN (ctx, &desc->elan_data_event->event32);
|
||||
desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input);
|
||||
INITEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32,
|
||||
INITEVENT_WORD (ctx, (EVENT *) & desc->elan_data_event->event32,
|
||||
&desc->main_doneWord);
|
||||
RESETEVENT_WORD (&desc->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32, 1);
|
||||
PRIMEEVENT_WORD (ctx,
|
||||
(EVENT *) & desc->elan_data_event->event32, 1);
|
||||
}
|
||||
|
||||
item = (ompi_list_item_t *) frag;
|
||||
|
@ -83,7 +83,7 @@ extern mca_ptl_elan_state_t mca_ptl_elan_global_state;
|
||||
mca_ptl_elan_send_frag_t *
|
||||
mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr,
|
||||
struct mca_pml_base_send_request_t *sendreq,
|
||||
int oneside)
|
||||
int desc_type)
|
||||
{
|
||||
|
||||
ompi_free_list_t *flist;
|
||||
@ -93,11 +93,18 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr,
|
||||
START_FUNC();
|
||||
|
||||
/* For now, bind to queue DMA directly */
|
||||
if (oneside) {
|
||||
/*struct mca_ptl_elan_peer_t *peer;*/
|
||||
flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->putget)->tx_desc_free;
|
||||
} else {
|
||||
if (MCA_PTL_ELAN_DESC_QDMA) {
|
||||
flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->queue)->tx_desc_free;
|
||||
} else if (MCA_PTL_ELAN_DESC_PUT) {
|
||||
flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->putget)->put_desc_free;
|
||||
} else if (MCA_PTL_ELAN_DESC_GET) {
|
||||
/*struct mca_ptl_elan_peer_t *peer;*/
|
||||
flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->putget)->get_desc_free;
|
||||
} else {
|
||||
ompi_output (0,
|
||||
"[%s:%d] Error: unknown to descriptor desc type\n",
|
||||
__FILE__, __LINE__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (ompi_using_threads ()) {
|
||||
@ -134,11 +141,7 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr,
|
||||
desc = (mca_ptl_elan_send_frag_t *) item;
|
||||
desc->desc->req = (struct mca_ptl_elan_send_request_t *) sendreq;
|
||||
|
||||
if (oneside) {
|
||||
desc->desc->desc_type = MCA_PTL_ELAN_DESC_PUTGET;
|
||||
} else {
|
||||
desc->desc->desc_type = MCA_PTL_ELAN_DESC_QDMA;
|
||||
}
|
||||
desc->desc->desc_type = desc_type;
|
||||
|
||||
END_FUNC();
|
||||
return desc;
|
||||
|
@ -228,18 +228,31 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
|
||||
struct ompi_ptl_elan_putget_desc_t * desc;
|
||||
|
||||
mca_ptl_base_header_t *hdr;
|
||||
|
||||
START_FUNC();
|
||||
|
||||
desc = (ompi_ptl_elan_putget_desc_t *)frag->desc;
|
||||
destvp = ptl_peer->peer_vp;
|
||||
size_in = *size;
|
||||
hdr = &sendfrag->frag_base.frag_header;
|
||||
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
|
||||
hdr->hdr_common.hdr_flags = flags;
|
||||
hdr->hdr_common.hdr_size = sizeof(mca_ptl_base_frag_header_t);
|
||||
hdr->hdr_frag.hdr_frag_offset = offset;
|
||||
hdr->hdr_frag.hdr_frag_seq = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.lval = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.pval = frag;
|
||||
hdr->hdr_frag.hdr_dst_ptr = pml_req->req_peer_match;
|
||||
|
||||
desc->src_elan_addr = MAIN2ELAN (desc->rail->r_ctx,
|
||||
pml_req->req_base.req_addr);
|
||||
desc->dst_elan_addr = (E4_Addr)pml_req->req_peer_addr;
|
||||
|
||||
#define PUT_NON_CONTIGUOUS_DATA 0
|
||||
/* initialize convertor */
|
||||
if(size_in > 0) {
|
||||
if(size_in > 0 && PUT_NON_CONTIGUOUS_DATA) {
|
||||
struct iovec iov;
|
||||
ompi_convertor_t *convertor;
|
||||
|
||||
@ -260,8 +273,12 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
/* For now, eager sends are always packed into the descriptor
|
||||
* TODO: Inline up to 256 bytes (including the header), then
|
||||
* do a chained send for mesg < first_frag_size */
|
||||
iov.iov_base = &desc->buff[header_length];
|
||||
|
||||
desc->src_elan_addr = elan4_main2elan(ptl->ptl_elan_ctx,
|
||||
desc->desc_buff);
|
||||
iov.iov_base = desc->desc_buff;
|
||||
iov.iov_len = size_in;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, 1);
|
||||
if (rc < 0) {
|
||||
ompi_output (0, "[%s:%d] Unable to pack data\n",
|
||||
@ -274,18 +291,67 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
}
|
||||
|
||||
*size = size_out;
|
||||
hdr->hdr_frag.hdr_frag_length = size_out;
|
||||
|
||||
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
||||
desc->main_dma.dma_srcAddr = desc->dst_elan_addr;
|
||||
|
||||
/* XXX: no additional flags for the DMA, remote, shmem, qwrite,
|
||||
* broadcast, etc */
|
||||
flags = 0;
|
||||
|
||||
#define MCA_PTL_ELAN_USE_CHAINED_DMA 0
|
||||
|
||||
#if defined(MCA_PTL_ELAN_USE_CHAINED_DMA)
|
||||
/* Setup a chained DMA
|
||||
* FIXME: remember
|
||||
*/
|
||||
/* Setup the chain dma */
|
||||
desc->chain_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||
sizeof(mca_ptl_base_frag_header_t),
|
||||
DMA_DataTypeByte, DMA_QueueWrite, 8);
|
||||
desc->chain_dma.dma_cookie = elan4_local_cookie(ptl->putget->pg_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||
desc->chain_dma.dma_vproc = destvp;
|
||||
desc->chain_dma.dma_srcAddr = elan4_main2elan (ctx, (void *) hdr);
|
||||
desc->chain_dma.dma_dstAddr = 0x0ULL;
|
||||
desc->chain_dma.dma_srcEvent = SDRAM2ELAN (ctx,
|
||||
&desc->elan_data_event->event32);
|
||||
/* causes the inputter to redirect the dma to the inputq */
|
||||
desc->chain_dma.dma_dstEvent = elan4_main2elan (ctx,
|
||||
(void *) ptl->queue->input);
|
||||
|
||||
INITEVENT_WORD (ctx, (EVENT *) & desc->elan_data_event->event32,
|
||||
&desc->main_doneWord);
|
||||
RESETEVENT_WORD (&desc->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx,
|
||||
(EVENT *) & desc->elan_data_event->event32, 1);
|
||||
|
||||
desc->chain_dma.dma_typeSize |= RUN_DMA_CMD;
|
||||
desc->chain_dma.dma_pad = NOP_CMD;
|
||||
|
||||
/* Copy down the chain dma to the chain buffer in elan sdram */
|
||||
memcpy ((void *)desc->chain_buf, (void *)&chain_dma, sizeof (E4_DMA64));
|
||||
desc->chain_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||
desc->chain_event->ev_Params[0] = elan4_main2elan (ctx,
|
||||
(void *)desc->chain_buf);
|
||||
/* XXX:
|
||||
* The chain dma will go directly into a command stream
|
||||
* so we need addend the command queue control bits.
|
||||
* Allocate space from command queues hanged off the CTX.
|
||||
*/
|
||||
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
#endif
|
||||
|
||||
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
||||
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
|
||||
|
||||
/* Chain an event */
|
||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->chain_event);
|
||||
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
|
||||
|
||||
/* XXX: Hardcoded DMA retry count */
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||
(header_length + size_out), DMA_DataTypeByte, flags,
|
||||
putget->pg_retryCount);
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (size_out,
|
||||
DMA_DataTypeByte, flags, putget->pg_retryCount);
|
||||
|
||||
/* Just a normal DMA, no need to have additional flags */
|
||||
desc->main_dma.dma_cookie = elan4_local_cookie (
|
||||
@ -304,10 +370,8 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
hdr->hdr_common.hdr_size);
|
||||
}
|
||||
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
/*elan4_run_dma_cmd(cmdq, (E4_DMA *)&pd->pd_dma);*/
|
||||
END_FUNC();
|
||||
}
|
||||
|
||||
@ -345,6 +409,8 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc,
|
||||
struct ompi_ptl_elan_putget_desc_t *pdesc;
|
||||
|
||||
pdesc = (ompi_ptl_elan_putget_desc_t *)desc->desc;
|
||||
|
||||
/* For each put/get descriptor, a QDMA is chained off. */
|
||||
mca_ptl_elan_init_putget_desc (pdesc, ptl, ptl_peer, sendreq,
|
||||
offset, size, flags);
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & pdesc->main_dma);
|
||||
@ -354,8 +420,7 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc,
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
ompi_list_append (&ptl->queue->put_desc, (ompi_list_item_t *) desc);
|
||||
} else {
|
||||
ompi_output (0,
|
||||
"Other types of DMA are not supported right now \n");
|
||||
ompi_output (0, "Other types of DMA are not supported right now \n");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -376,8 +441,6 @@ static void
|
||||
mca_ptl_elan_data_frag (struct mca_ptl_elan_module_t *ptl,
|
||||
mca_ptl_base_header_t * header)
|
||||
{
|
||||
/* For PML interfacing, refer to mca_ptl_tcp_recv_frag_match(frag, sd);*/
|
||||
|
||||
/* Allocate a recv frag descriptor */
|
||||
mca_ptl_elan_recv_frag_t *recv_frag;
|
||||
ompi_list_item_t *item;
|
||||
@ -613,6 +676,7 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
|
||||
req = desc->desc->req;
|
||||
header = (mca_ptl_base_header_t *)&
|
||||
((ompi_ptl_elan_qdma_desc_t *)desc->desc)->buff[0];
|
||||
|
||||
if (CHECK_ELAN) {
|
||||
char hostname[32];
|
||||
gethostname(hostname, 32);
|
||||
@ -625,6 +689,15 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
|
||||
header->hdr_common.hdr_size);
|
||||
}
|
||||
mca_ptl_elan_send_desc_done (desc, req);
|
||||
|
||||
/* Remember to reset the events */
|
||||
INITEVENT_WORD (ctx,
|
||||
(EVENT *) & desc->elan_data_event->event32,
|
||||
&desc->main_doneWord);
|
||||
RESETEVENT_WORD (&desc->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx,
|
||||
(EVENT *) & desc->elan_data_event->event32, 1);
|
||||
|
||||
} else {
|
||||
/* XXX: Stop at any incomplete send desc */
|
||||
break;
|
||||
|
@ -76,7 +76,8 @@ enum {
|
||||
/* the first four bits for type */
|
||||
MCA_PTL_ELAN_DESC_NULL = 0x00,
|
||||
MCA_PTL_ELAN_DESC_QDMA = 0x01,
|
||||
MCA_PTL_ELAN_DESC_PUTGET = 0x02,
|
||||
MCA_PTL_ELAN_DESC_PUT = 0x02,
|
||||
MCA_PTL_ELAN_DESC_GET = 0x04,
|
||||
/* next first four bits for status */
|
||||
MCA_PTL_ELAN_DESC_LOCAL = 0x10,
|
||||
MCA_PTL_ELAN_DESC_CACHED = 0x20
|
||||
@ -112,24 +113,19 @@ struct ompi_ptl_elan_recv_queue_t {
|
||||
};
|
||||
typedef struct ompi_ptl_elan_recv_queue_t ompi_ptl_elan_recv_queue_t;
|
||||
|
||||
typedef struct {
|
||||
/* SHOULD BE 128-byte aligned
|
||||
* queue req data packet */
|
||||
/*uint8_t data[INPUT_QUEUE_MAX]; For NIC-based tag-matching*/
|
||||
/* SHOULD be 32-byte aligned */
|
||||
E4_Event32 event32; /* Local elan completion event */
|
||||
} ompi_elan_event_t;
|
||||
|
||||
/**
|
||||
* ELAN descriptor for send
|
||||
*/
|
||||
#define ELAN_BASE_DESC_FIELDS \
|
||||
E4_DMA64 main_dma; /**< Must be 8-byte aligned */ \
|
||||
E4_DMA64 main_dma; /**< 8-byte aligned */ \
|
||||
/* 8 byte aligned */ \
|
||||
volatile E4_uint64 main_doneWord; \
|
||||
/* 8 byte aligned */ \
|
||||
ompi_elan_event_t *elan_data_event; \
|
||||
E4_Event32 *elan_event; \
|
||||
uint8_t *desc_buff; \
|
||||
/* 8 byte aligned */ \
|
||||
mca_pml_base_send_request_t *req; \
|
||||
mca_ptl_elan_module_t *ptl; \
|
||||
/* 8 byte aligned */ \
|
||||
int desc_type; \
|
||||
int desc_status; \
|
||||
@ -145,21 +141,14 @@ struct ompi_ptl_elan_qdma_desc_t {
|
||||
|
||||
ELAN_BASE_DESC_FIELDS
|
||||
/* 8 byte aligned */
|
||||
|
||||
mca_ptl_elan_module_t *ptl;
|
||||
RAIL *rail;
|
||||
/* 8 byte aligned */
|
||||
|
||||
uint8_t buff[INPUT_QUEUE_MAX]; /**< queue data */
|
||||
/* 8 byte aligned */
|
||||
//ompi_convertor_t frag_convertor; /**< datatype convertor */
|
||||
};
|
||||
typedef struct ompi_ptl_elan_qdma_desc_t ompi_ptl_elan_qdma_desc_t;
|
||||
|
||||
struct ompi_ptl_elan_queue_ctrl_t {
|
||||
|
||||
/* Transmit Queues */
|
||||
/** < elan located INPUT_QUEUE_ALIGN'ed with INPUT_QUEUE_SIZE */
|
||||
/** <Elan located INPUT_QUEUE_ALIGN'ed with INPUT_QUEUE_SIZE */
|
||||
E4_InputQueue *input;
|
||||
|
||||
/** <transmit queue structures */
|
||||
@ -167,7 +156,6 @@ struct ompi_ptl_elan_queue_ctrl_t {
|
||||
E4_CmdQ *tx_cmdq;
|
||||
ELAN4_COOKIEPOOL *tx_cpool;
|
||||
ompi_event_t *tx_events;
|
||||
|
||||
ompi_list_t tx_desc;
|
||||
ompi_free_list_t tx_desc_free;
|
||||
|
||||
@ -177,10 +165,6 @@ struct ompi_ptl_elan_queue_ctrl_t {
|
||||
int rx_slotsize;
|
||||
int rx_nslots;
|
||||
|
||||
/*Automatic progression */
|
||||
void (*rx_fn) (void);
|
||||
void *rx_handle;
|
||||
|
||||
/* Recv Queue has to be well-aligned */
|
||||
ompi_ptl_elan_recv_queue_t *rxq;
|
||||
};
|
||||
@ -191,12 +175,15 @@ struct ompi_ptl_elan_putget_desc_t {
|
||||
ELAN_BASE_DESC_FIELDS
|
||||
/* 8 byte aligned */
|
||||
|
||||
mca_ptl_elan_module_t *ptl;
|
||||
RAIL *rail;
|
||||
E4_DMA64 chain_dma; /**< Must be 8-byte aligned */
|
||||
/* 8 byte aligned */
|
||||
volatile E4_uint64 chain_doneWord;
|
||||
/* 8 byte aligned */
|
||||
E4_Event32 *chain_event; /* E4_Event plus pad */
|
||||
E4_Addr *chain_buff;
|
||||
|
||||
uint8_t *src_elan_addr;
|
||||
uint8_t *dst_elan_addr;
|
||||
E4_Addr src_elan_addr;
|
||||
E4_Addr dst_elan_addr;
|
||||
/* 8 byte aligned */
|
||||
};
|
||||
typedef struct ompi_ptl_elan_putget_desc_t ompi_ptl_elan_putget_desc_t;
|
||||
@ -204,11 +191,11 @@ typedef struct ompi_ptl_elan_putget_desc_t ompi_ptl_elan_putget_desc_t;
|
||||
struct ompi_ptl_elan_putget_ctrl_t {
|
||||
|
||||
/** <transmit queue structures */
|
||||
u_int putget_throttle;
|
||||
int putget_retryCount;
|
||||
int putget_evictCache;
|
||||
int32_t putget_waitType;
|
||||
ELAN_FLAGS putget_flags;
|
||||
u_int pg_throttle;
|
||||
int pg_retryCount;
|
||||
int pg_evictCache;
|
||||
int32_t pg_waitType;
|
||||
ELAN_FLAGS pg_flags;
|
||||
|
||||
E4_CmdQ *put_cmdq;
|
||||
E4_CmdQ *get_cmdq;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user