1
1

get chained DMA done, to test and add threading support

This commit was SVN r1913.
Этот коммит содержится в:
Weikuan Yu 2004-08-05 20:57:25 +00:00
родитель a9e22a500d
Коммит f4ac830175
5 изменённых файлов: 199 добавлений и 111 удалений

Просмотреть файл

@ -245,7 +245,8 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl,
if (offset == 0) { /* The first fragment uses a cached desc */
desc = ((mca_ptl_elan_send_request_t*)sendreq)->req_frag;
} else {
desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, 0);
desc = mca_ptl_elan_alloc_send_desc(ptl,
sendreq, MCA_PTL_ELAN_DESC_QDMA);
if (NULL == desc) {
ompi_output(0,
"[%s:%d] Unable to allocate an elan send descriptors \n",
@ -288,7 +289,7 @@ mca_ptl_elan_put (struct mca_ptl_base_module_t *ptl,
START_FUNC();
desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, 1);
desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, MCA_PTL_ELAN_DESC_PUT);
if (NULL == desc) {
ompi_output(0,
"[%s:%d] Unable to allocate an elan send descriptors \n",
@ -317,7 +318,30 @@ mca_ptl_elan_get (struct mca_ptl_base_module_t *ptl,
size_t size,
int flags)
{
return OMPI_SUCCESS;
int rc = OMPI_SUCCESS;
mca_ptl_elan_send_frag_t *desc;
/* XXX:
* Since the address passed down from PML does not provide
* elan information, so there needs to be a change
*/
START_FUNC();
desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, MCA_PTL_ELAN_DESC_GET);
if (NULL == desc) {
ompi_output(0,
"[%s:%d] Unable to allocate an elan send descriptors \n",
__FILE__, __LINE__);
}
rc = mca_ptl_elan_start_desc(desc,
(struct mca_ptl_elan_peer_t *)ptl_peer,
sendreq, offset, &size, flags);
/* Update all the sends until the put is done */
END_FUNC();
return rc;
}
/*

Просмотреть файл

@ -97,12 +97,13 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
{
desc->main_dma.dma_dstAddr = 0;
desc->main_dma.dma_srcEvent =
SDRAM2ELAN (ctx, &elan_ptr->event32);
SDRAM2ELAN (ctx, &desc->elan_data_event->event32);
desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input);
INITEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32,
INITEVENT_WORD (ctx, (EVENT *) & desc->elan_data_event->event32,
&desc->main_doneWord);
RESETEVENT_WORD (&desc->main_doneWord);
PRIMEEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32, 1);
PRIMEEVENT_WORD (ctx,
(EVENT *) & desc->elan_data_event->event32, 1);
}
item = (ompi_list_item_t *) frag;

Просмотреть файл

@ -83,7 +83,7 @@ extern mca_ptl_elan_state_t mca_ptl_elan_global_state;
mca_ptl_elan_send_frag_t *
mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr,
struct mca_pml_base_send_request_t *sendreq,
int oneside)
int desc_type)
{
ompi_free_list_t *flist;
@ -93,11 +93,18 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr,
START_FUNC();
/* For now, bind to queue DMA directly */
if (oneside) {
/*struct mca_ptl_elan_peer_t *peer;*/
flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->putget)->tx_desc_free;
} else {
if (MCA_PTL_ELAN_DESC_QDMA) {
flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->queue)->tx_desc_free;
} else if (MCA_PTL_ELAN_DESC_PUT) {
flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->putget)->put_desc_free;
} else if (MCA_PTL_ELAN_DESC_GET) {
/*struct mca_ptl_elan_peer_t *peer;*/
flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->putget)->get_desc_free;
} else {
ompi_output (0,
"[%s:%d] Error: unknown to descriptor desc type\n",
__FILE__, __LINE__);
return NULL;
}
if (ompi_using_threads ()) {
@ -134,11 +141,7 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr,
desc = (mca_ptl_elan_send_frag_t *) item;
desc->desc->req = (struct mca_ptl_elan_send_request_t *) sendreq;
if (oneside) {
desc->desc->desc_type = MCA_PTL_ELAN_DESC_PUTGET;
} else {
desc->desc->desc_type = MCA_PTL_ELAN_DESC_QDMA;
}
desc->desc->desc_type = desc_type;
END_FUNC();
return desc;

Просмотреть файл

@ -228,18 +228,31 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
struct ompi_ptl_elan_putget_desc_t * desc;
mca_ptl_base_header_t *hdr;
START_FUNC();
desc = (ompi_ptl_elan_putget_desc_t *)frag->desc;
destvp = ptl_peer->peer_vp;
size_in = *size;
hdr = &sendfrag->frag_base.frag_header;
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
hdr->hdr_common.hdr_flags = flags;
hdr->hdr_common.hdr_size = sizeof(mca_ptl_base_frag_header_t);
hdr->hdr_frag.hdr_frag_offset = offset;
hdr->hdr_frag.hdr_frag_seq = 0;
hdr->hdr_frag.hdr_src_ptr.lval = 0;
hdr->hdr_frag.hdr_src_ptr.pval = frag;
hdr->hdr_frag.hdr_dst_ptr = pml_req->req_peer_match;
desc->src_elan_addr = MAIN2ELAN (desc->rail->r_ctx,
pml_req->req_base.req_addr);
desc->dst_elan_addr = (E4_Addr)pml_req->req_peer_addr;
#define PUT_NON_CONTIGUOUS_DATA 0
/* initialize convertor */
if(size_in > 0) {
if(size_in > 0 && PUT_NON_CONTIGUOUS_DATA) {
struct iovec iov;
ompi_convertor_t *convertor;
@ -260,8 +273,12 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
/* For now, eager sends are always packed into the descriptor
* TODO: Inline up to 256 bytes (including the header), then
* do a chained send for mesg < first_frag_size */
iov.iov_base = &desc->buff[header_length];
desc->src_elan_addr = elan4_main2elan(ptl->ptl_elan_ctx,
desc->desc_buff);
iov.iov_base = desc->desc_buff;
iov.iov_len = size_in;
rc = ompi_convertor_pack(convertor, &iov, 1);
if (rc < 0) {
ompi_output (0, "[%s:%d] Unable to pack data\n",
@ -274,18 +291,67 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
}
*size = size_out;
hdr->hdr_frag.hdr_frag_length = size_out;
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
desc->main_dma.dma_srcAddr = desc->dst_elan_addr;
/* XXX: no additional flags for the DMA, remote, shmem, qwrite,
* broadcast, etc */
flags = 0;
#define MCA_PTL_ELAN_USE_CHAINED_DMA 0
#if defined(MCA_PTL_ELAN_USE_CHAINED_DMA)
/* Setup a chained DMA
* FIXME: remember
*/
/* Setup the chain dma */
desc->chain_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
sizeof(mca_ptl_base_frag_header_t),
DMA_DataTypeByte, DMA_QueueWrite, 8);
desc->chain_dma.dma_cookie = elan4_local_cookie(ptl->putget->pg_cpool,
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
desc->chain_dma.dma_vproc = destvp;
desc->chain_dma.dma_srcAddr = elan4_main2elan (ctx, (void *) hdr);
desc->chain_dma.dma_dstAddr = 0x0ULL;
desc->chain_dma.dma_srcEvent = SDRAM2ELAN (ctx,
&desc->elan_data_event->event32);
/* causes the inputter to redirect the dma to the inputq */
desc->chain_dma.dma_dstEvent = elan4_main2elan (ctx,
(void *) ptl->queue->input);
INITEVENT_WORD (ctx, (EVENT *) & desc->elan_data_event->event32,
&desc->main_doneWord);
RESETEVENT_WORD (&desc->main_doneWord);
PRIMEEVENT_WORD (ctx,
(EVENT *) & desc->elan_data_event->event32, 1);
desc->chain_dma.dma_typeSize |= RUN_DMA_CMD;
desc->chain_dma.dma_pad = NOP_CMD;
/* Copy down the chain dma to the chain buffer in elan sdram */
memcpy ((void *)desc->chain_buf, (void *)&chain_dma, sizeof (E4_DMA64));
desc->chain_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
desc->chain_event->ev_Params[0] = elan4_main2elan (ctx,
(void *)desc->chain_buf);
/* XXX:
* The chain dma will go directly into a command stream
* so we need addend the command queue control bits.
* Allocate space from command queues hanged off the CTX.
*/
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
#endif
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
/* Chain an event */
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->chain_event);
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
/* XXX: Hardcoded DMA retry count */
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
(header_length + size_out), DMA_DataTypeByte, flags,
putget->pg_retryCount);
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (size_out,
DMA_DataTypeByte, flags, putget->pg_retryCount);
/* Just a normal DMA, no need to have additional flags */
desc->main_dma.dma_cookie = elan4_local_cookie (
@ -304,10 +370,8 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag,
hdr->hdr_common.hdr_size);
}
/* Make main memory coherent with IO domain (IA64) */
MEMBAR_VISIBLE ();
/*elan4_run_dma_cmd(cmdq, (E4_DMA *)&pd->pd_dma);*/
END_FUNC();
}
@ -345,6 +409,8 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc,
struct ompi_ptl_elan_putget_desc_t *pdesc;
pdesc = (ompi_ptl_elan_putget_desc_t *)desc->desc;
/* For each put/get descriptor, a QDMA is chained off. */
mca_ptl_elan_init_putget_desc (pdesc, ptl, ptl_peer, sendreq,
offset, size, flags);
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & pdesc->main_dma);
@ -354,8 +420,7 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc,
/* Insert desc into the list of outstanding DMA's */
ompi_list_append (&ptl->queue->put_desc, (ompi_list_item_t *) desc);
} else {
ompi_output (0,
"Other types of DMA are not supported right now \n");
ompi_output (0, "Other types of DMA are not supported right now \n");
return OMPI_ERROR;
}
@ -376,8 +441,6 @@ static void
mca_ptl_elan_data_frag (struct mca_ptl_elan_module_t *ptl,
mca_ptl_base_header_t * header)
{
/* For PML interfacing, refer to mca_ptl_tcp_recv_frag_match(frag, sd);*/
/* Allocate a recv frag descriptor */
mca_ptl_elan_recv_frag_t *recv_frag;
ompi_list_item_t *item;
@ -613,6 +676,7 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
req = desc->desc->req;
header = (mca_ptl_base_header_t *)&
((ompi_ptl_elan_qdma_desc_t *)desc->desc)->buff[0];
if (CHECK_ELAN) {
char hostname[32];
gethostname(hostname, 32);
@ -625,6 +689,15 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
header->hdr_common.hdr_size);
}
mca_ptl_elan_send_desc_done (desc, req);
/* Remember to reset the events */
INITEVENT_WORD (ctx,
(EVENT *) & desc->elan_data_event->event32,
&desc->main_doneWord);
RESETEVENT_WORD (&desc->main_doneWord);
PRIMEEVENT_WORD (ctx,
(EVENT *) & desc->elan_data_event->event32, 1);
} else {
/* XXX: Stop at any incomplete send desc */
break;

Просмотреть файл

@ -76,7 +76,8 @@ enum {
/* the first four bits for type */
MCA_PTL_ELAN_DESC_NULL = 0x00,
MCA_PTL_ELAN_DESC_QDMA = 0x01,
MCA_PTL_ELAN_DESC_PUTGET = 0x02,
MCA_PTL_ELAN_DESC_PUT = 0x02,
MCA_PTL_ELAN_DESC_GET = 0x04,
/* next first four bits for status */
MCA_PTL_ELAN_DESC_LOCAL = 0x10,
MCA_PTL_ELAN_DESC_CACHED = 0x20
@ -112,24 +113,19 @@ struct ompi_ptl_elan_recv_queue_t {
};
typedef struct ompi_ptl_elan_recv_queue_t ompi_ptl_elan_recv_queue_t;
typedef struct {
/* SHOULD BE 128-byte aligned
* queue req data packet */
/*uint8_t data[INPUT_QUEUE_MAX]; For NIC-based tag-matching*/
/* SHOULD be 32-byte aligned */
E4_Event32 event32; /* Local elan completion event */
} ompi_elan_event_t;
/**
* ELAN descriptor for send
*/
#define ELAN_BASE_DESC_FIELDS \
E4_DMA64 main_dma; /**< Must be 8-byte aligned */ \
E4_DMA64 main_dma; /**< 8-byte aligned */ \
/* 8 byte aligned */ \
volatile E4_uint64 main_doneWord; \
/* 8 byte aligned */ \
ompi_elan_event_t *elan_data_event; \
E4_Event32 *elan_event; \
uint8_t *desc_buff; \
/* 8 byte aligned */ \
mca_pml_base_send_request_t *req; \
mca_ptl_elan_module_t *ptl; \
/* 8 byte aligned */ \
int desc_type; \
int desc_status; \
@ -145,21 +141,14 @@ struct ompi_ptl_elan_qdma_desc_t {
ELAN_BASE_DESC_FIELDS
/* 8 byte aligned */
mca_ptl_elan_module_t *ptl;
RAIL *rail;
/* 8 byte aligned */
uint8_t buff[INPUT_QUEUE_MAX]; /**< queue data */
/* 8 byte aligned */
//ompi_convertor_t frag_convertor; /**< datatype convertor */
};
typedef struct ompi_ptl_elan_qdma_desc_t ompi_ptl_elan_qdma_desc_t;
struct ompi_ptl_elan_queue_ctrl_t {
/* Transmit Queues */
/** < elan located INPUT_QUEUE_ALIGN'ed with INPUT_QUEUE_SIZE */
/** <Elan located INPUT_QUEUE_ALIGN'ed with INPUT_QUEUE_SIZE */
E4_InputQueue *input;
/** <transmit queue structures */
@ -167,7 +156,6 @@ struct ompi_ptl_elan_queue_ctrl_t {
E4_CmdQ *tx_cmdq;
ELAN4_COOKIEPOOL *tx_cpool;
ompi_event_t *tx_events;
ompi_list_t tx_desc;
ompi_free_list_t tx_desc_free;
@ -177,10 +165,6 @@ struct ompi_ptl_elan_queue_ctrl_t {
int rx_slotsize;
int rx_nslots;
/*Automatic progression */
void (*rx_fn) (void);
void *rx_handle;
/* Recv Queue has to be well-aligned */
ompi_ptl_elan_recv_queue_t *rxq;
};
@ -191,12 +175,15 @@ struct ompi_ptl_elan_putget_desc_t {
ELAN_BASE_DESC_FIELDS
/* 8 byte aligned */
mca_ptl_elan_module_t *ptl;
RAIL *rail;
E4_DMA64 chain_dma; /**< Must be 8-byte aligned */
/* 8 byte aligned */
volatile E4_uint64 chain_doneWord;
/* 8 byte aligned */
E4_Event32 *chain_event; /* E4_Event plus pad */
E4_Addr *chain_buff;
uint8_t *src_elan_addr;
uint8_t *dst_elan_addr;
E4_Addr src_elan_addr;
E4_Addr dst_elan_addr;
/* 8 byte aligned */
};
typedef struct ompi_ptl_elan_putget_desc_t ompi_ptl_elan_putget_desc_t;
@ -204,11 +191,11 @@ typedef struct ompi_ptl_elan_putget_desc_t ompi_ptl_elan_putget_desc_t;
struct ompi_ptl_elan_putget_ctrl_t {
/** <transmit queue structures */
u_int putget_throttle;
int putget_retryCount;
int putget_evictCache;
int32_t putget_waitType;
ELAN_FLAGS putget_flags;
u_int pg_throttle;
int pg_retryCount;
int pg_evictCache;
int32_t pg_waitType;
ELAN_FLAGS pg_flags;
E4_CmdQ *put_cmdq;
E4_CmdQ *get_cmdq;