-- Extend ptl_base_ack_header to be mca_ptl_elan_ack_header with
a pointer to the fragment. -- Done with shared send completion queue support -- To test thead-based send/recv completion checking Points to be aware, i) Expand control mesg header type and Optimize completion checking. ii) Introduce additional control message for management functionality. This commit was SVN r2389.
Этот коммит содержится в:
родитель
4ccbabd6df
Коммит
c9badff0c9
@ -246,9 +246,10 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
START_FUNC(PTL_ELAN_DEBUG_SEND);
|
START_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||||
|
|
||||||
{
|
{
|
||||||
/* FIXME: YUW, remove this block */
|
/* FIXME: YUW, remove this block */
|
||||||
fprintf(stderr, "[proc%s:%s:%d] here\n",
|
fprintf(stderr, "[proc%s:%s:%d] here \n",
|
||||||
getenv("RMS_RANK"), __FILE__, __LINE__);
|
getenv("RMS_RANK"), __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,7 +367,6 @@ mca_ptl_elan_get (struct mca_ptl_base_module_t *ptl,
|
|||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*rc = mca_ptl_elan_start_desc(desc, */
|
|
||||||
rc = mca_ptl_elan_start_get(desc, (struct mca_ptl_elan_peer_t *)ptl_peer,
|
rc = mca_ptl_elan_start_get(desc, (struct mca_ptl_elan_peer_t *)ptl_peer,
|
||||||
req, offset, &size, flags);
|
req, offset, &size, flags);
|
||||||
|
|
||||||
@ -400,19 +400,6 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl,
|
|||||||
request = frag->frag_request;
|
request = frag->frag_request;
|
||||||
recv_frag = (mca_ptl_elan_recv_frag_t * ) frag;
|
recv_frag = (mca_ptl_elan_recv_frag_t * ) frag;
|
||||||
|
|
||||||
/* FIXME + TODO: Optimized processing fragments
|
|
||||||
* Pseudocode, for additional processing of fragments
|
|
||||||
* a) (ACK:no, Get:No)
|
|
||||||
* Remove the frag. no need for further processing
|
|
||||||
* b) (ACK:yes, Get:No)
|
|
||||||
* Send an ACK only
|
|
||||||
* c) (ACK:yes, Get:yes)
|
|
||||||
* Get a message, update the fragment descriptor and
|
|
||||||
* then send an ACK,
|
|
||||||
* d) Consider moving time-consuming tasks to some BH-like
|
|
||||||
* mechanisms.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) {
|
if (header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) {
|
||||||
int desc_type ;
|
int desc_type ;
|
||||||
/* Basic ACK scheme following TCP cases */
|
/* Basic ACK scheme following TCP cases */
|
||||||
@ -450,7 +437,8 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl,
|
|||||||
#if 1
|
#if 1
|
||||||
set = fetchNset (&((mca_ptl_elan_recv_frag_t *)frag)->frag_progressed, 1);
|
set = fetchNset (&((mca_ptl_elan_recv_frag_t *)frag)->frag_progressed, 1);
|
||||||
#else
|
#else
|
||||||
set = ompi_atomic_fetch_and_set_int (&((mca_ptl_elan_recv_frag_t *)frag)->frag_progressed, 1);
|
set = ompi_atomic_fetch_and_set_int (
|
||||||
|
&((mca_ptl_elan_recv_frag_t *)frag)->frag_progressed, 1);
|
||||||
#endif
|
#endif
|
||||||
if (!set) {
|
if (!set) {
|
||||||
/* IN TCP case, IO_VEC is first allocated.
|
/* IN TCP case, IO_VEC is first allocated.
|
||||||
@ -482,39 +470,10 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl,
|
|||||||
header->hdr_frag.hdr_frag_offset);
|
header->hdr_frag.hdr_frag_offset);
|
||||||
ompi_convertor_unpack(&frag->frag_base.frag_convertor, &iov, 1);
|
ompi_convertor_unpack(&frag->frag_base.frag_convertor, &iov, 1);
|
||||||
}
|
}
|
||||||
#if 0
|
|
||||||
if (header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) {
|
|
||||||
/* FIXME: Pseudocode, for additional processing of fragments
|
|
||||||
* a) (ACK:no, Get:No)
|
|
||||||
* Remove the frag. no need for further processing
|
|
||||||
* b) (ACK:yes, Get:No)
|
|
||||||
* Send an ACK only
|
|
||||||
* c) (ACK:yes, Get:yes)
|
|
||||||
* Get a message, update the fragment descriptor and
|
|
||||||
* then send an ACK,
|
|
||||||
* d) Consider moving time-consuming tasks to some BH-like
|
|
||||||
* mechanisms.
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
frag->frag_base.frag_owner->ptl_recv_progress (
|
|
||||||
frag->frag_base.frag_owner,
|
|
||||||
request,
|
|
||||||
frag->frag_base.frag_size,
|
|
||||||
frag->frag_base.frag_size);
|
|
||||||
|
|
||||||
/* FIXME:
|
|
||||||
* To support the required ACK, do not return
|
|
||||||
* until the ack is out */
|
|
||||||
if (((mca_ptl_elan_recv_frag_t *) frag)->frag_ack_pending == false)
|
|
||||||
mca_ptl_elan_recv_frag_return (frag->frag_base.frag_owner,
|
|
||||||
(mca_ptl_elan_recv_frag_t *) frag);
|
|
||||||
#else
|
|
||||||
/* XXX: progress the request based on the status of this recv frag
|
/* XXX: progress the request based on the status of this recv frag
|
||||||
* It is possible to employ a scheduling logic here.
|
* It is possible to employ a scheduling logic here.
|
||||||
* Then Done with this fragment, i.e., data */
|
* Then Done with this fragment, i.e., data */
|
||||||
mca_ptl_elan_recv_frag_done (header, frag, request);
|
mca_ptl_elan_recv_frag_done (header, frag, request);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
|||||||
#if OMPI_PTL_ELAN_COMP_QUEUE
|
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||||
/* XXX: provide a DMA structure for each chained event */
|
/* XXX: provide a DMA structure for each chained event */
|
||||||
desc->comp_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
desc->comp_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||||
sizeof(mca_ptl_base_frag_header_t),
|
sizeof(mca_ptl_base_header_t),
|
||||||
DMA_DataTypeByte, DMA_QueueWrite, 8);
|
DMA_DataTypeByte, DMA_QueueWrite, 8);
|
||||||
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
|
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
|
||||||
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
|
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
|
||||||
@ -120,6 +120,10 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
|||||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx,
|
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx,
|
||||||
(E4_Event *)desc->comp_event);
|
(E4_Event *)desc->comp_event);
|
||||||
desc->main_dma.dma_dstEvent= SDRAM2ELAN (ctx, queue->input);
|
desc->main_dma.dma_dstEvent= SDRAM2ELAN (ctx, queue->input);
|
||||||
|
|
||||||
|
LOG_PRINT(PTL_ELAN_DEBUG_NONE,
|
||||||
|
"desc %p comp_buff %p elan_event %p comp_event %p \n",
|
||||||
|
desc, desc->comp_buff, desc->elan_event, desc->comp_event);
|
||||||
#else
|
#else
|
||||||
/* Initialize some of the dma structures */
|
/* Initialize some of the dma structures */
|
||||||
desc->main_dma.dma_dstAddr = 0;
|
desc->main_dma.dma_dstAddr = 0;
|
||||||
@ -145,7 +149,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
mca_ptl_elan_putget_desc_contruct (
|
mca_ptl_elan_putget_desc_construct (
|
||||||
mca_ptl_elan_module_t * ptl,
|
mca_ptl_elan_module_t * ptl,
|
||||||
ompi_ptl_elan_putget_desc_t *desc,
|
ompi_ptl_elan_putget_desc_t *desc,
|
||||||
EVENT *elan_event,
|
EVENT *elan_event,
|
||||||
@ -170,9 +174,15 @@ mca_ptl_elan_putget_desc_contruct (
|
|||||||
desc->comp_event= (E4_Event *) ((char *)elan_event
|
desc->comp_event= (E4_Event *) ((char *)elan_event
|
||||||
+ 2 * ELAN_BLOCK_SIZE + 2 * sizeof (E4_Event32));
|
+ 2 * ELAN_BLOCK_SIZE + 2 * sizeof (E4_Event32));
|
||||||
|
|
||||||
|
LOG_PRINT(PTL_ELAN_DEBUG_NONE,
|
||||||
|
"desc %p chain_buff %p comp_buff %p elan_event %p "
|
||||||
|
" chain_event %p comp_event %p \n",
|
||||||
|
desc, desc->chain_buff, desc->comp_buff, desc->elan_event,
|
||||||
|
desc->chain_event, desc->comp_event);
|
||||||
|
|
||||||
/* XXX: provide a DMA structure for each chained event */
|
/* XXX: provide a DMA structure for each chained event */
|
||||||
desc->comp_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
desc->comp_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||||
sizeof(mca_ptl_base_frag_header_t),
|
sizeof(mca_ptl_base_header_t),
|
||||||
DMA_DataTypeByte, DMA_QueueWrite, 8);
|
DMA_DataTypeByte, DMA_QueueWrite, 8);
|
||||||
desc->comp_dma.dma_vproc = ptl->elan_vp;
|
desc->comp_dma.dma_vproc = ptl->elan_vp;
|
||||||
desc->comp_dma.dma_srcAddr = 0x0ULL; /* To be filled in */
|
desc->comp_dma.dma_srcAddr = 0x0ULL; /* To be filled in */
|
||||||
@ -196,6 +206,7 @@ mca_ptl_elan_putget_desc_contruct (
|
|||||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx,
|
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx,
|
||||||
(E4_Event *)desc->chain_event);
|
(E4_Event *)desc->chain_event);
|
||||||
desc->main_dma.dma_dstEvent= 0x0ULL;
|
desc->main_dma.dma_dstEvent= 0x0ULL;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
desc->elan_event = elan_event;
|
desc->elan_event = elan_event;
|
||||||
desc->chain_event= (E4_Event32 *)
|
desc->chain_event= (E4_Event32 *)
|
||||||
@ -228,7 +239,7 @@ do { \
|
|||||||
frag->desc = (ompi_ptl_elan_base_desc_t *)dp; \
|
frag->desc = (ompi_ptl_elan_base_desc_t *)dp; \
|
||||||
\
|
\
|
||||||
/* Initialize some of the dma structures */ \
|
/* Initialize some of the dma structures */ \
|
||||||
mca_ptl_elan_putget_desc_contruct (ptl, dp, \
|
mca_ptl_elan_putget_desc_construct (ptl, dp, \
|
||||||
eptr, 0, 0, local); \
|
eptr, 0, 0, local); \
|
||||||
\
|
\
|
||||||
item = (ompi_list_item_t *) frag; \
|
item = (ompi_list_item_t *) frag; \
|
||||||
|
@ -22,12 +22,19 @@ struct ompi_ptl_elan_base_desc_t;
|
|||||||
|
|
||||||
struct mca_ptl_elan_send_frag_t {
|
struct mca_ptl_elan_send_frag_t {
|
||||||
mca_ptl_base_frag_t frag_base;
|
mca_ptl_base_frag_t frag_base;
|
||||||
|
struct ompi_ptl_elan_base_desc_t *desc;
|
||||||
volatile int frag_progressed;
|
volatile int frag_progressed;
|
||||||
bool frag_ack_pending; /* Is there an ack to recv */
|
bool frag_ack_pending; /* Is there an ack to recv */
|
||||||
struct ompi_ptl_elan_base_desc_t *desc;
|
|
||||||
};
|
};
|
||||||
typedef struct mca_ptl_elan_send_frag_t mca_ptl_elan_send_frag_t;
|
typedef struct mca_ptl_elan_send_frag_t mca_ptl_elan_send_frag_t;
|
||||||
|
|
||||||
|
/* XXX: Extend the header a bit with an pointer to frag */
|
||||||
|
struct mca_ptl_elan_ack_header_t {
|
||||||
|
struct mca_ptl_base_ack_header_t base_ack; /* 32 bytes */
|
||||||
|
struct mca_ptl_elan_send_frag_t *frag;
|
||||||
|
};
|
||||||
|
typedef struct mca_ptl_elan_ack_header_t mca_ptl_elan_ack_header_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ELAN received fragment derived type.
|
* ELAN received fragment derived type.
|
||||||
*/
|
*/
|
||||||
@ -37,12 +44,6 @@ struct mca_ptl_elan_recv_frag_t {
|
|||||||
size_t frag_msg_cnt;
|
size_t frag_msg_cnt;
|
||||||
volatile int frag_progressed; /* Is it record to request */
|
volatile int frag_progressed; /* Is it record to request */
|
||||||
bool frag_ack_pending; /* Is there an ack to send */
|
bool frag_ack_pending; /* Is there an ack to send */
|
||||||
#if 0
|
|
||||||
union {
|
|
||||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
|
||||||
struct ompi_ptl_elan_putget_desc_t *putget;
|
|
||||||
} frag;
|
|
||||||
#endif
|
|
||||||
char *alloc_buff;
|
char *alloc_buff;
|
||||||
char *unex_buff;
|
char *unex_buff;
|
||||||
};
|
};
|
||||||
|
@ -354,6 +354,16 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
|||||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->comp_event);
|
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->comp_event);
|
||||||
desc->main_dma.dma_srcAddr = MAIN2ELAN (ctx, &desc->buff[0]);
|
desc->main_dma.dma_srcAddr = MAIN2ELAN (ctx, &desc->buff[0]);
|
||||||
|
|
||||||
|
LOG_PRINT (PTL_ELAN_DEBUG_SEND,
|
||||||
|
" desc %p comp_buff %p comp_event %p "
|
||||||
|
"comp src_addr %x main dst_addr %x size %d\n",
|
||||||
|
desc,
|
||||||
|
(void *)desc->comp_buff,
|
||||||
|
(void *)desc->comp_event,
|
||||||
|
(int)desc->comp_dma.dma_srcAddr,
|
||||||
|
(int)desc->main_dma.dma_srcAddr,
|
||||||
|
size_out);
|
||||||
#else
|
#else
|
||||||
desc->main_dma.dma_srcAddr = MAIN2ELAN (ctx, &desc->buff[0]);
|
desc->main_dma.dma_srcAddr = MAIN2ELAN (ctx, &desc->buff[0]);
|
||||||
/* XXX: Hardcoded DMA retry count */
|
/* XXX: Hardcoded DMA retry count */
|
||||||
@ -366,6 +376,10 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
|||||||
desc->main_dma.dma_vproc = destvp;
|
desc->main_dma.dma_vproc = destvp;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
LOG_PRINT (PTL_ELAN_DEBUG_SEND,
|
||||||
|
"dest events main %lx \n",
|
||||||
|
desc->main_dma.dma_dstEvent);
|
||||||
|
|
||||||
/* Make main memory coherent with IO domain (IA64) */
|
/* Make main memory coherent with IO domain (IA64) */
|
||||||
MEMBAR_VISIBLE ();
|
MEMBAR_VISIBLE ();
|
||||||
END_FUNC(PTL_ELAN_DEBUG_SEND);
|
END_FUNC(PTL_ELAN_DEBUG_SEND);
|
||||||
@ -471,6 +485,12 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag,
|
|||||||
desc->chain_dma.dma_dstEvent = elan4_main2elan (ctx,
|
desc->chain_dma.dma_dstEvent = elan4_main2elan (ctx,
|
||||||
(void *) ptl->queue->input);
|
(void *) ptl->queue->input);
|
||||||
|
|
||||||
|
LOG_PRINT (PTL_ELAN_DEBUG_PUT,
|
||||||
|
"dest events main %lx chain %lx comp %lx \n",
|
||||||
|
desc->main_dma.dma_dstEvent,
|
||||||
|
desc->chain_dma.dma_dstEvent,
|
||||||
|
desc->comp_dma.dma_dstEvent);
|
||||||
|
|
||||||
#if OMPI_PTL_ELAN_COMP_QUEUE
|
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||||
/* XXX: Chain a QDMA to each queue and
|
/* XXX: Chain a QDMA to each queue and
|
||||||
* Have all the srcEvent fired to the Queue
|
* Have all the srcEvent fired to the Queue
|
||||||
@ -484,6 +504,7 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag,
|
|||||||
desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||||
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||||
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||||
|
|
||||||
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
|
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
|
||||||
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
|
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
|
||||||
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx,
|
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx,
|
||||||
@ -503,13 +524,6 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag,
|
|||||||
desc->chain_dma.dma_typeSize |= RUN_DMA_CMD;
|
desc->chain_dma.dma_typeSize |= RUN_DMA_CMD;
|
||||||
desc->chain_dma.dma_pad = NOP_CMD;
|
desc->chain_dma.dma_pad = NOP_CMD;
|
||||||
|
|
||||||
LOG_PRINT (PTL_ELAN_DEBUG_FLAG,
|
|
||||||
" desc %p chain_buff %p chain_event %p "
|
|
||||||
"src_addr %x dst_addr %x size %d\n",
|
|
||||||
desc, (void *)desc->chain_buff, (void *)desc->chain_event,
|
|
||||||
(int)desc->src_elan_addr,
|
|
||||||
(int)desc->dst_elan_addr, size_out);
|
|
||||||
|
|
||||||
/* Copy down the chain dma to the chain buffer in elan sdram */
|
/* Copy down the chain dma to the chain buffer in elan sdram */
|
||||||
memcpy ((void *)desc->chain_buff, (void *)&desc->chain_dma,
|
memcpy ((void *)desc->chain_buff, (void *)&desc->chain_dma,
|
||||||
sizeof (E4_DMA64));
|
sizeof (E4_DMA64));
|
||||||
@ -521,16 +535,14 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag,
|
|||||||
|
|
||||||
/* XXX: The chain dma will go directly into a command stream
|
/* XXX: The chain dma will go directly into a command stream
|
||||||
* so we need addend the command queue control bits.
|
* so we need addend the command queue control bits.
|
||||||
* Allocate space from command queues hanged off the CTX.
|
* Allocate space from command queues hanged off the CTX. */
|
||||||
*/
|
|
||||||
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||||
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
||||||
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
|
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
|
||||||
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
|
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
|
||||||
|
|
||||||
/* Chain an event */
|
/* Chain an event */
|
||||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx,
|
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->chain_event);
|
||||||
(E4_Event *)desc->chain_event);
|
|
||||||
|
|
||||||
/* FIXME: no additional flags for the DMA, remote, shmem, qwrite,
|
/* FIXME: no additional flags for the DMA, remote, shmem, qwrite,
|
||||||
* broadcast, etc. Be sure to correctly setup a chained DMA. */
|
* broadcast, etc. Be sure to correctly setup a chained DMA. */
|
||||||
@ -543,11 +555,11 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag,
|
|||||||
destvp);
|
destvp);
|
||||||
desc->main_dma.dma_vproc = destvp;
|
desc->main_dma.dma_vproc = destvp;
|
||||||
|
|
||||||
LOG_PRINT(PTL_ELAN_DEBUG_MAC,
|
LOG_PRINT (PTL_ELAN_DEBUG_PUT,
|
||||||
"destvp %d type %d flag %d size %d\n",
|
"chain_event %p param0 %lx param1 %lx \n",
|
||||||
destvp, hdr->hdr_common.hdr_type,
|
desc->chain_event,
|
||||||
hdr->hdr_common.hdr_flags,
|
desc->chain_event->ev_Params[0],
|
||||||
hdr->hdr_common.hdr_size);
|
desc->chain_event->ev_Params[1]);
|
||||||
|
|
||||||
/* Make main memory coherent with IO domain (IA64) */
|
/* Make main memory coherent with IO domain (IA64) */
|
||||||
MEMBAR_VISIBLE ();
|
MEMBAR_VISIBLE ();
|
||||||
@ -878,6 +890,7 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
|||||||
mca_ptl_base_header_t *hdr;
|
mca_ptl_base_header_t *hdr;
|
||||||
mca_pml_base_recv_request_t* request;
|
mca_pml_base_recv_request_t* request;
|
||||||
mca_ptl_elan_module_t *elan_ptl;
|
mca_ptl_elan_module_t *elan_ptl;
|
||||||
|
ELAN4_CTX *ctx;
|
||||||
|
|
||||||
int destvp;
|
int destvp;
|
||||||
|
|
||||||
@ -889,7 +902,8 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
|||||||
elan_ptl = (mca_ptl_elan_module_t *) ptl;
|
elan_ptl = (mca_ptl_elan_module_t *) ptl;
|
||||||
desc->desc->desc_type = MCA_PTL_ELAN_DESC_QDMA;
|
desc->desc->desc_type = MCA_PTL_ELAN_DESC_QDMA;
|
||||||
qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc;
|
qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc;
|
||||||
hdr = (mca_ptl_base_header_t *) & qdma->buff[0];
|
ctx = elan_ptl->ptl_elan_ctx;
|
||||||
|
hdr = &desc->frag_base.frag_header;
|
||||||
request = recv_frag->frag_recv.frag_request;
|
request = recv_frag->frag_recv.frag_request;
|
||||||
|
|
||||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK;
|
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK;
|
||||||
@ -904,27 +918,34 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
|||||||
hdr->hdr_ack.hdr_dst_match.lval = 0;
|
hdr->hdr_ack.hdr_dst_match.lval = 0;
|
||||||
hdr->hdr_ack.hdr_dst_match.pval = request;
|
hdr->hdr_ack.hdr_dst_match.pval = request;
|
||||||
|
|
||||||
/* FIXME: this needs to be some offsete from the base addr */
|
/* FIXME: this needs to be some offsete from the base addr
|
||||||
|
* posted buffer size is the leftover */
|
||||||
hdr->hdr_ack.hdr_dst_addr.pval = 0;
|
hdr->hdr_ack.hdr_dst_addr.pval = 0;
|
||||||
hdr->hdr_ack.hdr_dst_addr.lval = elan4_main2elan(
|
hdr->hdr_ack.hdr_dst_addr.lval = elan4_main2elan(ctx,
|
||||||
elan_ptl->ptl_elan_ctx, request->req_base.req_addr);
|
request->req_base.req_addr);
|
||||||
|
|
||||||
/* FIXME: posted buffer size is the leftover */
|
|
||||||
hdr->hdr_ack.hdr_dst_size =
|
hdr->hdr_ack.hdr_dst_size =
|
||||||
request->req_bytes_packed - request->req_bytes_received;
|
request->req_bytes_packed - request->req_bytes_received;
|
||||||
|
|
||||||
LOG_PRINT(PTL_ELAN_DEBUG_ACK,
|
/* FIXME: save frag descriptor somewhere in the header */
|
||||||
"remote frag %p local req %p buffer %p size %d \n",
|
((mca_ptl_elan_ack_header_t *) hdr)->frag = desc;
|
||||||
hdr->hdr_ack.hdr_src_ptr.pval,
|
|
||||||
hdr->hdr_ack.hdr_dst_match.pval,
|
|
||||||
hdr->hdr_ack.hdr_dst_addr.pval,
|
|
||||||
hdr->hdr_ack.hdr_dst_size);
|
|
||||||
|
|
||||||
/* Filling up QDMA descriptor */
|
LOG_PRINT(PTL_ELAN_DEBUG_ACK, "desc %p hdr %p \n", desc, hdr);
|
||||||
qdma->main_dma.dma_srcAddr = elan4_main2elan(
|
|
||||||
elan_ptl->ptl_elan_ctx, &qdma->buff[0]);
|
|
||||||
|
|
||||||
/* XXX: Hardcoded DMA retry count */
|
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||||
|
/* XXX: Need to have a way to differentiate different frag */
|
||||||
|
qdma->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||||
|
qdma->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||||
|
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||||
|
qdma->comp_dma.dma_cookie = elan4_local_cookie(
|
||||||
|
elan_ptl->queue->tx_cpool,
|
||||||
|
E4_COOKIE_TYPE_LOCAL_DMA,
|
||||||
|
elan_ptl->elan_vp);
|
||||||
|
qdma->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *) hdr);
|
||||||
|
memcpy ((void *)qdma->comp_buff, (void *)&qdma->comp_dma,
|
||||||
|
sizeof (E4_DMA64));
|
||||||
|
|
||||||
|
/* XXX: Hardcoded DMA retry count
|
||||||
|
* Initialize some of the dma structures */
|
||||||
qdma->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
qdma->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||||
sizeof(mca_ptl_base_ack_header_t),
|
sizeof(mca_ptl_base_ack_header_t),
|
||||||
DMA_DataTypeByte, DMA_QueueWrite, 16);
|
DMA_DataTypeByte, DMA_QueueWrite, 16);
|
||||||
@ -932,6 +953,19 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
|||||||
qdma->main_dma.dma_cookie = elan4_local_cookie (
|
qdma->main_dma.dma_cookie = elan4_local_cookie (
|
||||||
elan_ptl->queue->tx_cpool,
|
elan_ptl->queue->tx_cpool,
|
||||||
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||||
|
qdma->main_dma.dma_srcAddr = elan4_main2elan(ctx, (void *) hdr);
|
||||||
|
qdma->main_dma.dma_srcEvent= elan4_main2elan(ctx, qdma->comp_event);
|
||||||
|
#else
|
||||||
|
/* Filling up QDMA descriptor */
|
||||||
|
qdma->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||||
|
sizeof(mca_ptl_base_ack_header_t),
|
||||||
|
DMA_DataTypeByte, DMA_QueueWrite, 16);
|
||||||
|
qdma->main_dma.dma_vproc = destvp;
|
||||||
|
qdma->main_dma.dma_cookie = elan4_local_cookie (
|
||||||
|
elan_ptl->queue->tx_cpool,
|
||||||
|
E4_COOKIE_TYPE_LOCAL_DMA, destvp);
|
||||||
|
qdma->main_dma.dma_srcAddr = elan4_main2elan(ctx, (void*)hdr);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Make main memory coherent with IO domain (IA64) */
|
/* Make main memory coherent with IO domain (IA64) */
|
||||||
MEMBAR_VISIBLE ();
|
MEMBAR_VISIBLE ();
|
||||||
@ -1068,7 +1102,14 @@ mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl)
|
|||||||
header->hdr_common.hdr_flags,
|
header->hdr_common.hdr_flags,
|
||||||
header->hdr_common.hdr_size);
|
header->hdr_common.hdr_size);
|
||||||
|
|
||||||
frag = (mca_ptl_elan_send_frag_t *)header->hdr_frag.hdr_src_ptr.pval;
|
/* FIXME: To handle other different types of headers
|
||||||
|
* and use a simplied way checking completion */
|
||||||
|
if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_ACK) {
|
||||||
|
frag = ((mca_ptl_elan_ack_header_t*)header)->frag;
|
||||||
|
} else {
|
||||||
|
frag = (mca_ptl_elan_send_frag_t *)
|
||||||
|
header->hdr_frag.hdr_src_ptr.pval;
|
||||||
|
}
|
||||||
basic = (ompi_ptl_elan_base_desc_t*)frag->desc;
|
basic = (ompi_ptl_elan_base_desc_t*)frag->desc;
|
||||||
|
|
||||||
/* XXX: please reset additional chained event for put/get desc */
|
/* XXX: please reset additional chained event for put/get desc */
|
||||||
|
@ -53,7 +53,7 @@
|
|||||||
#define PTL_ELAN_DEBUG_CHAIN (0x800)
|
#define PTL_ELAN_DEBUG_CHAIN (0x800)
|
||||||
|
|
||||||
#define PTL_ELAN_DEBUG_FLAG \
|
#define PTL_ELAN_DEBUG_FLAG \
|
||||||
(PTL_ELAN_DEBUG_MAC|PTL_ELAN_DEBUG_SEND|PTL_ELAN_DEBUG_RECV|PTL_ELAN_DEBUG_PUT|PTL_ELAN_DEBUG_GET)
|
(PTL_ELAN_DEBUG_PUT|PTL_ELAN_DEBUG_GET)
|
||||||
|
|
||||||
#define START_FUNC(flag) \
|
#define START_FUNC(flag) \
|
||||||
do { \
|
do { \
|
||||||
@ -93,11 +93,11 @@ do { \
|
|||||||
#define OMPI_PTL_ELAN_LOST_QSLOTS (1)
|
#define OMPI_PTL_ELAN_LOST_QSLOTS (1)
|
||||||
|
|
||||||
#define OMPI_PTL_ELAN_MAX_QDESCS (128)
|
#define OMPI_PTL_ELAN_MAX_QDESCS (128)
|
||||||
#define OMPI_PTL_ELAN_NUM_QDESCS (4)
|
#define OMPI_PTL_ELAN_NUM_QDESCS (2)
|
||||||
#define OMPI_PTL_ELAN_QDMA_RETRY (16)
|
#define OMPI_PTL_ELAN_QDMA_RETRY (16)
|
||||||
|
|
||||||
#define OMPI_PTL_ELAN_MAX_PUTGET (32)
|
#define OMPI_PTL_ELAN_MAX_PUTGET (32)
|
||||||
#define OMPI_PTL_ELAN_NUM_PUTGET (8)
|
#define OMPI_PTL_ELAN_NUM_PUTGET (1)
|
||||||
#define OMPI_PTL_ELAN_MAX_PGDESC (8)
|
#define OMPI_PTL_ELAN_MAX_PGDESC (8)
|
||||||
|
|
||||||
#define OMPI_PTL_ELAN_FASTPATH (0x1)
|
#define OMPI_PTL_ELAN_FASTPATH (0x1)
|
||||||
@ -120,7 +120,7 @@ do { \
|
|||||||
do { \
|
do { \
|
||||||
if (value == unexp) { \
|
if (value == unexp) { \
|
||||||
ompi_output(output, \
|
ompi_output(output, \
|
||||||
"[%s:%d] allocate received unexpect value \n", \
|
"[%s:%d] alloc received unexpect value \n", \
|
||||||
__FILE__, __LINE__); \
|
__FILE__, __LINE__); \
|
||||||
return errno; \
|
return errno; \
|
||||||
} \
|
} \
|
||||||
@ -260,7 +260,7 @@ struct ompi_ptl_elan_putget_desc_t {
|
|||||||
/* 8 byte aligned */
|
/* 8 byte aligned */
|
||||||
volatile E4_uint64 chain_doneWord;
|
volatile E4_uint64 chain_doneWord;
|
||||||
/* 8 byte aligned */
|
/* 8 byte aligned */
|
||||||
E4_Event32 *chain_event; /* E4_Event plus pad */
|
E4_Event *chain_event; /* E4_Event plus pad */
|
||||||
E4_Addr *chain_buff;
|
E4_Addr *chain_buff;
|
||||||
|
|
||||||
E4_Addr src_elan_addr;
|
E4_Addr src_elan_addr;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user