a) Get PTL_ELAN coding done
b) Testing and optimizing is the next This commit was SVN r1546.
Этот коммит содержится в:
родитель
d4e1d133ca
Коммит
9df5813c6b
@ -9,8 +9,6 @@ AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include \
|
||||
-I$(top_ompi_srcdir)/src -I$(top_ompi_srcdir)/src/include \
|
||||
-I/usr/lib/qsnet/elan4/include
|
||||
|
||||
#AM_LDFLAGS = -lelan -lelanctrl -lrms -lrmscall -lelan4
|
||||
|
||||
noinst_LTLIBRARIES = libmca_ptl_elan.la
|
||||
libmca_ptl_elan_la_SOURCES = \
|
||||
elan_config.h \
|
||||
@ -25,6 +23,7 @@ libmca_ptl_elan_la_SOURCES = \
|
||||
ptl_elan_proc.c \
|
||||
ptl_elan_module.c \
|
||||
ptl_elan_priv.c \
|
||||
ptl_elan_peer.c \
|
||||
ptl_elan_init.c \
|
||||
ptl_elan_comm_init.c \
|
||||
ptl_elan.c
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "ptl_elan_frag.h"
|
||||
#include "ptl_elan_priv.h"
|
||||
|
||||
|
||||
/* XXX: There must be multiple PTL's. This could be the template */
|
||||
mca_ptl_elan_t mca_ptl_elan = {
|
||||
{
|
||||
@ -45,11 +44,12 @@ mca_ptl_elan_t mca_ptl_elan = {
|
||||
}
|
||||
};
|
||||
|
||||
int mca_ptl_elan_add_procs (struct mca_ptl_t *ptl,
|
||||
int
|
||||
mca_ptl_elan_add_procs (struct mca_ptl_t *ptl,
|
||||
size_t nprocs,
|
||||
struct ompi_proc_t **procs,
|
||||
struct mca_ptl_base_peer_t **peers,
|
||||
ompi_bitmap_t* reachable)
|
||||
ompi_bitmap_t * reachable)
|
||||
{
|
||||
struct ompi_proc_t *ompi_proc;
|
||||
mca_ptl_elan_proc_t *ptl_proc;
|
||||
@ -58,59 +58,78 @@ int mca_ptl_elan_add_procs (struct mca_ptl_t *ptl,
|
||||
int rc;
|
||||
int i;
|
||||
|
||||
for(i=0; i<nprocs; i++) {
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
|
||||
ompi_proc = procs[i];
|
||||
ptl_proc = mca_ptl_elan_proc_create(ompi_proc);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(ptl_proc, NULL, OMPI_ERR_OUT_OF_RESOURCE, 0);
|
||||
ptl_proc = mca_ptl_elan_proc_create (ompi_proc);
|
||||
|
||||
if (NULL == ptl_proc) {
|
||||
ompi_output (0,
|
||||
"[%s:%d] could not create a ptl proc\n",
|
||||
__FILE__, __LINE__);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Check to make sure that the peer has at least as many
|
||||
* interface addresses exported as we are trying to use.
|
||||
* If not, then don't bind this PTL instance to the proc.
|
||||
*/
|
||||
OMPI_THREAD_LOCK(&ptl_proc->proc_lock);
|
||||
OMPI_THREAD_LOCK (&ptl_proc->proc_lock);
|
||||
|
||||
if(ptl_proc->proc_addr_count == ptl_proc->proc_peer_count) {
|
||||
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
|
||||
ompi_output(0, "all peers are taken already\n");
|
||||
if (ptl_proc->proc_addr_count == ptl_proc->proc_peer_count) {
|
||||
OMPI_THREAD_UNLOCK (&ptl_proc->proc_lock);
|
||||
ompi_output (0, "all peers are taken already\n");
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
|
||||
/* The ptl_proc datastructure is shared by all TCP PTL
|
||||
/* The ptl_proc datastructure is shared by all PTL
|
||||
* instances that are trying to reach this destination.
|
||||
* Cache the peer instance on the ptl_proc.
|
||||
*/
|
||||
ptl_peer = OBJ_NEW(mca_ptl_elan_peer_t);
|
||||
if(NULL == ptl_peer) {
|
||||
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
|
||||
|
||||
ptl_peer = OBJ_NEW (mca_ptl_elan_peer_t);
|
||||
if (NULL == ptl_peer) {
|
||||
OMPI_THREAD_UNLOCK (&ptl_proc->proc_lock);
|
||||
ompi_output (0, "[%s:%d] unabled to allocate ptl_peer \n",
|
||||
__FILE__, __LINE__);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
ptl_peer->peer_ptl = (mca_ptl_elan_t*)ptl;
|
||||
rc = mca_ptl_elan_proc_insert(ptl_proc, ptl_peer);
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
OBJ_RELEASE(ptl_peer);
|
||||
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
|
||||
ptl_peer->peer_ptl = (mca_ptl_elan_t *) ptl;
|
||||
|
||||
rc = mca_ptl_elan_proc_insert (ptl_proc, ptl_peer);
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
OBJ_RELEASE (ptl_peer);
|
||||
OMPI_THREAD_UNLOCK (&ptl_proc->proc_lock);
|
||||
ompi_output (0, "[%s:%d] unabled to insert ptl_peer \n",
|
||||
__FILE__, __LINE__);
|
||||
return rc;
|
||||
}
|
||||
ompi_bitmap_set_bit(reachable, i);
|
||||
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
|
||||
peers[i] = ptl_peer;
|
||||
ompi_bitmap_set_bit (reachable, i);
|
||||
|
||||
OMPI_THREAD_UNLOCK (&ptl_proc->proc_lock);
|
||||
peers[i] = (struct mca_ptl_base_peer_t *) ptl_peer;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_ptl_elan_del_procs (struct mca_ptl_t *ptl,
|
||||
int
|
||||
mca_ptl_elan_del_procs (struct mca_ptl_t *ptl,
|
||||
size_t nprocs,
|
||||
struct ompi_proc_t ** procs,
|
||||
struct ompi_proc_t **procs,
|
||||
struct mca_ptl_base_peer_t **peers)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
OBJ_RELEASE (peers[i]);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_ptl_elan_finalize (struct mca_ptl_t *ptl)
|
||||
int
|
||||
mca_ptl_elan_finalize (struct mca_ptl_t *ptl)
|
||||
{
|
||||
int rail_index;
|
||||
struct mca_ptl_elan_t * elan_ptl ;
|
||||
struct mca_ptl_elan_t *elan_ptl;
|
||||
|
||||
elan_ptl = (struct mca_ptl_elan_t *) ptl;
|
||||
|
||||
@ -118,58 +137,57 @@ int mca_ptl_elan_finalize (struct mca_ptl_t *ptl)
|
||||
|
||||
/* Free the PTL */
|
||||
rail_index = elan_ptl->ptl_ni_local;
|
||||
free(elan_ptl);
|
||||
free (elan_ptl);
|
||||
|
||||
/* Record the missing of this entry */
|
||||
mca_ptl_elan_module.elan_ptls[rail_index] = NULL;
|
||||
mca_ptl_elan_module.elan_num_ptls -- ;
|
||||
mca_ptl_elan_module.elan_num_ptls--;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_ptl_elan_req_alloc (struct mca_ptl_t *ptl,
|
||||
int
|
||||
mca_ptl_elan_req_alloc (struct mca_ptl_t *ptl,
|
||||
struct mca_pml_base_send_request_t **request)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
mca_pml_base_send_request_t* sendreq;
|
||||
ompi_list_item_t* item;
|
||||
mca_pml_base_send_request_t *sendreq;
|
||||
ompi_list_item_t *item;
|
||||
|
||||
#if 0
|
||||
/* PTL_TCP allocate request from the module
|
||||
* But PTL_ELAN have to allocate from each PTL since
|
||||
* all the descriptors are bound to the PTL related command queue, etc
|
||||
* for their functions.
|
||||
*/
|
||||
OMPI_FREE_LIST_GET(&mca_ptl_elan_module.elan_send_requests, item, rc);
|
||||
/* FIXME, Error here, rc is passed in by value
|
||||
* Which will not bring any output from this allocation request */
|
||||
OMPI_FREE_LIST_GET (&mca_ptl_elan_module.elan_reqs_free, item, rc);
|
||||
|
||||
if(NULL != (sendreq = (mca_pml_base_send_request_t*)item))
|
||||
if (NULL != (sendreq = (mca_pml_base_send_request_t *) item))
|
||||
sendreq->req_owner = ptl;
|
||||
*request = sendreq;
|
||||
#endif
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
void mca_ptl_elan_req_return (struct mca_ptl_t *ptl,
|
||||
void
|
||||
mca_ptl_elan_req_return (struct mca_ptl_t *ptl,
|
||||
struct mca_pml_base_send_request_t *request)
|
||||
{
|
||||
/*OBJ_DESTRUCT(&request->req_convertor);*/
|
||||
#if 0
|
||||
OMPI_FREE_LIST_RETURN(&mca_ptl_elan_module.elan_send_requests,
|
||||
(ompi_list_item_t*)request);
|
||||
#endif
|
||||
OMPI_FREE_LIST_RETURN (&mca_ptl_elan_module.elan_reqs_free,
|
||||
(ompi_list_item_t *) request);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void mca_ptl_elan_recv_frag_return (struct mca_ptl_t *ptl,
|
||||
void
|
||||
mca_ptl_elan_recv_frag_return (struct mca_ptl_t *ptl,
|
||||
struct mca_ptl_elan_recv_frag_t *frag)
|
||||
{
|
||||
OMPI_FREE_LIST_RETURN(&mca_ptl_elan_module.elan_recv_frags_free,
|
||||
(ompi_list_item_t*)frag);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void mca_ptl_elan_send_frag_return (struct mca_ptl_t *ptl,
|
||||
void
|
||||
mca_ptl_elan_send_frag_return (struct mca_ptl_t *ptl,
|
||||
struct mca_ptl_elan_send_frag_t *frag)
|
||||
{
|
||||
return;
|
||||
@ -179,23 +197,57 @@ void mca_ptl_elan_send_frag_return (struct mca_ptl_t *ptl,
|
||||
* Initiate a put operation.
|
||||
*/
|
||||
|
||||
int mca_ptl_elan_put (struct mca_ptl_t* ptl,
|
||||
struct mca_ptl_base_peer_t* ptl_base_peer,
|
||||
struct mca_pml_base_send_request_t* request,
|
||||
int
|
||||
mca_ptl_elan_put (struct mca_ptl_t *ptl,
|
||||
struct mca_ptl_base_peer_t *ptl_peer,
|
||||
struct mca_pml_base_send_request_t *sendreq,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
int flags)
|
||||
{
|
||||
mca_ptl_elan_desc_item_t *sd;
|
||||
|
||||
if (size <= 0) {
|
||||
sendreq->super.req_mpi_done = true;
|
||||
sendreq->super.req_pml_done = true;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* XXX: fix pml_send?
|
||||
* Why presenting so many arguments while each of them is already
|
||||
* contained in the request descriptors,
|
||||
*
|
||||
* XXX:
|
||||
* PML extract an request from PTL module and then use this
|
||||
* a request to ask for a fragment
|
||||
* Is it too deep across stacks to get a request and
|
||||
* correspondingly multiple LOCKS to go through*/
|
||||
|
||||
sd = mca_ptl_elan_alloc_send_desc(sendreq);
|
||||
if (NULL == sd) {
|
||||
ompi_output(0,
|
||||
"[%s:%d] Unable to allocate an elan send descriptors \n",
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
|
||||
/* Update offset, in TCP case, this is a must.
|
||||
* XXX: Not sure how it is going to be here */
|
||||
sendreq->req_offset += size;
|
||||
((struct mca_ptl_elan_send_request_t *)sendreq)->req_frag = sd;
|
||||
|
||||
return mca_ptl_elan_start_desc(
|
||||
((struct mca_ptl_elan_send_request_t *)sendreq)->desc_type, sd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initiate a get.
|
||||
* Get routine. We need an interface that provides one more argument,
|
||||
* describing the source of the data.
|
||||
*/
|
||||
|
||||
int mca_ptl_elan_get (struct mca_ptl_t* ptl,
|
||||
struct mca_ptl_base_peer_t* ptl_base_peer,
|
||||
struct mca_pml_base_recv_request_t* request,
|
||||
int
|
||||
mca_ptl_elan_get (struct mca_ptl_t *ptl,
|
||||
struct mca_ptl_base_peer_t *ptl_base_peer,
|
||||
struct mca_pml_base_recv_request_t *request,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
int flags)
|
||||
@ -208,7 +260,8 @@ int mca_ptl_elan_get (struct mca_ptl_t* ptl,
|
||||
* ack back to the peer and process the fragment.
|
||||
*/
|
||||
|
||||
void mca_ptl_elan_matched (mca_ptl_t * ptl,
|
||||
void
|
||||
mca_ptl_elan_matched (mca_ptl_t * ptl,
|
||||
mca_ptl_base_recv_frag_t * frag)
|
||||
{
|
||||
return;
|
||||
|
@ -43,30 +43,7 @@ struct mca_ptl_elan_t {
|
||||
|
||||
struct ompi_ptl_elan_queue_ctrl_t *queue; /**< Queue control structures */
|
||||
|
||||
int elan_sten_size; /**< sten packet len */
|
||||
int elan_rdma_size; /**< qdma packet length */
|
||||
int elan_qdma_size; /**< qdma packet length */
|
||||
|
||||
int sten_total; /**< total sten descriptors */
|
||||
int rdma_total; /**< total rdma descriptors */
|
||||
int qdma_total; /**< total rdma descriptors */
|
||||
|
||||
int sten_num; /**< num of outstanding sten packets */
|
||||
int rdma_num; /**< num of outstanding rdma packets */
|
||||
int qdma_num; /**< num of outstanding rdma packets */
|
||||
|
||||
int max_num_dmas; /**< total rdma descriptors */
|
||||
|
||||
ompi_list_t elan_stens; /**< used elan sten descriptors*/
|
||||
ompi_list_t elan_dmas; /**< used elan dma descriptors*/
|
||||
ompi_list_t elan_rdmas; /**< used elan rdma descriptors */
|
||||
ompi_list_t elan_frags; /**< used elan fragments */
|
||||
|
||||
ompi_free_list_t elan_dmas_free; /**< free elan dma descriptors*/
|
||||
ompi_free_list_t elan_stens_free; /**< free elan sten descriptors*/
|
||||
ompi_free_list_t elan_rdmas_free; /**< free elan rdma descriptors */
|
||||
ompi_free_list_t elan_frags_free; /**< free elan rdma fragments */
|
||||
|
||||
};
|
||||
typedef struct mca_ptl_elan_t mca_ptl_elan_t;
|
||||
extern mca_ptl_elan_t mca_ptl_elan;
|
||||
@ -91,17 +68,13 @@ struct mca_ptl_elan_module_1_0_0_t {
|
||||
struct mca_ptl_elan_t **elan_ptls; /**< array of available PTLs */
|
||||
size_t elan_num_ptls; /**< number of ptls activated */
|
||||
|
||||
ompi_list_t elan_reqs; /**< all elan requests */
|
||||
ompi_list_t elan_prog_events; /**< events in progress */
|
||||
ompi_list_t elan_comp_events; /**< events completed, but to reclaim */
|
||||
ompi_list_t elan_procs; /**< elan proc's */
|
||||
ompi_list_t elan_reqs; /**< all elan requests */
|
||||
ompi_list_t elan_recv_frags;
|
||||
ompi_list_t elan_pending_acks;
|
||||
|
||||
ompi_free_list_t elan_events_free;/**< free events */
|
||||
ompi_free_list_t elan_reqs_free; /**< all elan requests */
|
||||
|
||||
ompi_event_t elan_send_event; /**< event structure for sends */
|
||||
ompi_event_t elan_recv_event; /**< event structure for recvs */
|
||||
ompi_free_list_t elan_recv_frags_free;
|
||||
|
||||
struct mca_ptl_elan_proc_t *elan_local;
|
||||
|
||||
|
@ -16,19 +16,21 @@
|
||||
#define ALIGNUP(x,a) (((unsigned int)(x) + ((a)-1)) & (-(a)))
|
||||
|
||||
static int
|
||||
ompi_init_elan_queue_events (mca_ptl_elan_t *ptl,
|
||||
ompi_ptl_elan_queue_ctrl_t *queue)
|
||||
ompi_init_elan_queue_events (mca_ptl_elan_t * ptl,
|
||||
ompi_ptl_elan_queue_ctrl_t * queue)
|
||||
{
|
||||
int i;
|
||||
int count;
|
||||
int main_align, main_size;
|
||||
int elan_align, elan_size;
|
||||
|
||||
mca_ptl_elan_desc_item_t *desc;
|
||||
|
||||
RAIL *rail;
|
||||
ELAN4_CTX *ctx;
|
||||
|
||||
ompi_free_list_t *flist;
|
||||
ompi_ptl_elan_queue_send_t *ptr;
|
||||
ompi_ptl_elan_qdma_desc_t *ptr;
|
||||
ompi_elan_event_t *elan_ptr;
|
||||
|
||||
rail = (RAIL *) ptl->ptl_elan_rail;
|
||||
@ -40,13 +42,12 @@ ompi_init_elan_queue_events (mca_ptl_elan_t *ptl,
|
||||
OBJ_CONSTRUCT (&queue->tx_desc, ompi_list_t);
|
||||
OBJ_CONSTRUCT (&queue->tx_desc_free, ompi_free_list_t);
|
||||
|
||||
main_align = MAX(sizeof(void *), 8);
|
||||
elan_align = MAX(sizeof(int *), 128);
|
||||
main_size = ALIGNUP(sizeof (ompi_ptl_elan_queue_send_t), main_align);
|
||||
elan_size = ALIGNUP(sizeof (ompi_ptl_elan_queue_send_t), elan_align);
|
||||
main_align = MAX (sizeof (void *), 8);
|
||||
elan_align = MAX (sizeof (int *), 128);
|
||||
main_size = ALIGNUP (sizeof (ompi_ptl_elan_qdma_desc_t), main_align);
|
||||
elan_size = ALIGNUP (sizeof (ompi_elan_event_t), elan_align);
|
||||
|
||||
flist->fl_elem_size =
|
||||
flist->fl_max_to_alloc = 128;
|
||||
flist->fl_elem_size = flist->fl_max_to_alloc = 128;
|
||||
flist->fl_num_allocated = 0;
|
||||
flist->fl_num_per_alloc = count = 16;
|
||||
flist->fl_elem_class = NULL; /* leave it null */
|
||||
@ -54,38 +55,49 @@ ompi_init_elan_queue_events (mca_ptl_elan_t *ptl,
|
||||
|
||||
/* Allocate the elements */
|
||||
|
||||
ptr = (ompi_ptl_elan_queue_send_t*) elan4_allocMain(rail->r_alloc,
|
||||
main_align, main_size*(count + 1));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(ptr, NULL, OMPI_ERROR, 0);
|
||||
desc = (mca_ptl_elan_desc_item_t *)
|
||||
malloc(sizeof(mca_ptl_elan_desc_item_t) * (count + 1));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (desc, NULL, OMPI_ERROR, 0);
|
||||
|
||||
ptr = (ompi_ptl_elan_qdma_desc_t *) elan4_allocMain (rail->r_alloc,
|
||||
main_align,
|
||||
main_size *
|
||||
(count + 1));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (ptr, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/* Allocating elan related structures */
|
||||
elan_ptr = (ompi_elan_event_t *) elan4_allocElan(rail->r_alloc,
|
||||
elan_align, elan_size * (count + 1));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(elan_ptr, NULL, OMPI_ERROR, 0);
|
||||
elan_ptr = (ompi_elan_event_t *) elan4_allocElan (rail->r_alloc,
|
||||
elan_align,
|
||||
elan_size * (count +
|
||||
1));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0);
|
||||
|
||||
for (i = 0; i < flist->fl_num_per_alloc; i++) {
|
||||
ompi_list_item_t *item;
|
||||
|
||||
for(i=0; i< flist->fl_num_per_alloc; i++) {
|
||||
ompi_list_item_t* item;
|
||||
ptr->rail = rail;
|
||||
ptr->elan_data_event = elan_ptr;
|
||||
|
||||
item = (ompi_list_item_t*)ptr;
|
||||
ompi_list_append(&flist->super, item);
|
||||
desc->item = (mca_ptl_elan_send_desc_t)ptr;
|
||||
|
||||
/* Initialize some of the dma structures */
|
||||
{
|
||||
ptr->main_dma.dma_dstAddr = 0;
|
||||
ptr->main_dma.dma_srcEvent = SDRAM2ELAN(ctx, &elan_ptr->event32);
|
||||
ptr->main_dma.dma_dstEvent = SDRAM2ELAN(ctx, queue->input);
|
||||
INITEVENT_WORD(ctx, (EVENT *)&elan_ptr->event32,
|
||||
ptr->main_dma.dma_srcEvent =
|
||||
SDRAM2ELAN (ctx, &elan_ptr->event32);
|
||||
ptr->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input);
|
||||
INITEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32,
|
||||
&ptr->main_doneWord);
|
||||
RESETEVENT_WORD(&ptr->main_doneWord);
|
||||
PRIMEEVENT_WORD(ctx, (EVENT *)&elan_ptr->event32, 1);
|
||||
|
||||
RESETEVENT_WORD (&ptr->main_doneWord);
|
||||
PRIMEEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32, 1);
|
||||
}
|
||||
|
||||
item = (ompi_list_item_t *) desc;
|
||||
ompi_list_append (&flist->super, item);
|
||||
|
||||
/* Progress to the next element */
|
||||
ptr = (ompi_ptl_elan_queue_send_t*) ((char *) ptr + main_size);
|
||||
ptr = (ompi_ptl_elan_qdma_desc_t *) ((char *) ptr + main_size);
|
||||
elan_ptr = (ompi_elan_event_t *) ((char *) elan_ptr + elan_size);
|
||||
desc ++;
|
||||
}
|
||||
flist->fl_num_allocated += flist->fl_num_per_alloc;
|
||||
|
||||
@ -93,24 +105,26 @@ ompi_init_elan_queue_events (mca_ptl_elan_t *ptl,
|
||||
}
|
||||
|
||||
int
|
||||
ompi_init_elan_stat (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
ompi_init_elan_stat (mca_ptl_elan_module_1_0_0_t * emp,
|
||||
int num_rails)
|
||||
{
|
||||
return (OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t * emp,
|
||||
int num_rails)
|
||||
{
|
||||
int i;
|
||||
int nslots = 128;
|
||||
int slotsize = 2048;
|
||||
RAIL *rail;
|
||||
ELAN4_CTX *ctx;
|
||||
struct mca_ptl_elan_t * ptl;
|
||||
struct mca_ptl_elan_t *ptl;
|
||||
|
||||
/* Init the Transmit Queue structure */
|
||||
for ( i = 0 ; i < num_rails; i++ ) {
|
||||
for (i = 0; i < num_rails; i++) {
|
||||
|
||||
ompi_ptl_elan_recv_queue_t *rxq;
|
||||
ompi_ptl_elan_queue_ctrl_t *queue;
|
||||
@ -120,21 +134,23 @@ ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
|
||||
|
||||
queue = ptl->queue = (ompi_ptl_elan_queue_ctrl_t *)
|
||||
malloc(sizeof(ompi_ptl_elan_queue_ctrl_t));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(queue, NULL, OMPI_ERROR, 0);
|
||||
memset(queue, 0, sizeof(ompi_ptl_elan_queue_ctrl_t));
|
||||
malloc (sizeof (ompi_ptl_elan_queue_ctrl_t));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (queue, NULL, OMPI_ERROR, 0);
|
||||
memset (queue, 0, sizeof (ompi_ptl_elan_queue_ctrl_t));
|
||||
|
||||
/* Allocate input queue */
|
||||
queue->input = (E4_InputQueue *) elan4_allocElan(rail->r_alloc,
|
||||
INPUT_QUEUE_ALIGN, INPUT_QUEUE_SIZE);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(queue->input, NULL, OMPI_ERROR, 0);
|
||||
queue->input = (E4_InputQueue *) elan4_allocElan (rail->r_alloc,
|
||||
INPUT_QUEUE_ALIGN,
|
||||
INPUT_QUEUE_SIZE);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (queue->input, NULL, OMPI_ERROR, 0);
|
||||
|
||||
queue->tx_cmdq = elan4_alloc_cmdq (ctx,
|
||||
rail->r_alloc,
|
||||
CQ_Size8K,
|
||||
CQ_WriteEnableBit | CQ_DmaStartEnableBit | CQ_STENEnableBit,
|
||||
NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(queue->tx_cmdq, NULL, OMPI_ERROR, 0);
|
||||
CQ_WriteEnableBit |
|
||||
CQ_DmaStartEnableBit |
|
||||
CQ_STENEnableBit, NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (queue->tx_cmdq, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/*
|
||||
* Elan4 has a rather complicated hierarchical event mechanism.
|
||||
@ -145,10 +161,10 @@ ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
*
|
||||
* Initialize a new event list managing this queue */
|
||||
|
||||
ompi_init_elan_queue_events(ptl, queue);
|
||||
ompi_init_elan_queue_events (ptl, queue);
|
||||
|
||||
/* Allocate a cookie pool */
|
||||
queue->tx_cpool = elan4_allocCookiePool(ctx, ptl->elan_vp);
|
||||
queue->tx_cpool = elan4_allocCookiePool (ctx, ptl->elan_vp);
|
||||
|
||||
/* Init the Receive Queue structure */
|
||||
queue->rx_nslots = 128;
|
||||
@ -156,56 +172,59 @@ ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
|
||||
queue->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ?
|
||||
INPUT_QUEUE_MAX : slotsize;
|
||||
queue->rx_slotsize = ELAN_ALIGNUP(slotsize, SLOT_ALIGN);
|
||||
queue->rx_slotsize = ELAN_ALIGNUP (slotsize, SLOT_ALIGN);
|
||||
|
||||
rxq = queue->rxq = (ompi_ptl_elan_recv_queue_t *)
|
||||
elan4_allocMain(rail->r_alloc, 64,
|
||||
sizeof(ompi_ptl_elan_recv_queue_t));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rxq, NULL, OMPI_ERROR, 0);
|
||||
memset(rxq,0,sizeof(ompi_ptl_elan_recv_queue_t));
|
||||
elan4_allocMain (rail->r_alloc, 64,
|
||||
sizeof (ompi_ptl_elan_recv_queue_t));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rxq, NULL, OMPI_ERROR, 0);
|
||||
memset (rxq, 0, sizeof (ompi_ptl_elan_recv_queue_t));
|
||||
|
||||
rxq->qr_rail = rail;
|
||||
rxq->qr_fptr = elan4_allocMain(rail->r_alloc,
|
||||
rxq->qr_fptr = elan4_allocMain (rail->r_alloc,
|
||||
128, nslots * queue->rx_slotsize);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_fptr, NULL, OMPI_ERROR, 0);
|
||||
memset(rxq->qr_fptr, 0xeb, nslots * queue->rx_slotsize);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_fptr, NULL, OMPI_ERROR, 0);
|
||||
memset (rxq->qr_fptr, 0xeb, nslots * queue->rx_slotsize);
|
||||
|
||||
rxq->qr_elanDone = ALLOC_ELAN(rail, SLOT_ALIGN, sizeof(EVENT32));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_elanDone, NULL, OMPI_ERROR, 0);
|
||||
rxq->qr_elanDone = ALLOC_ELAN (rail, SLOT_ALIGN, sizeof (EVENT32));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_elanDone, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/* Set the top et al */
|
||||
rxq->qr_efitem = (E4_uint64)elan4_main2elan(ctx, rxq->qr_fptr);
|
||||
rxq->qr_efitem = (E4_uint64) elan4_main2elan (ctx, rxq->qr_fptr);
|
||||
rxq->qr_base = rxq->qr_fptr;
|
||||
rxq->qr_top = (void *)((uintptr_t)rxq->qr_base
|
||||
+ (queue->rx_slotsize * (nslots-1)));
|
||||
rxq->qr_top = (void *) ((uintptr_t) rxq->qr_base
|
||||
+ (queue->rx_slotsize * (nslots - 1)));
|
||||
rxq->qr_efptr = rxq->qr_efitem;
|
||||
rxq->qr_elitem = rxq->qr_efitem + (queue->rx_slotsize * (nslots-1));
|
||||
rxq->qr_elitem =
|
||||
rxq->qr_efitem + (queue->rx_slotsize * (nslots - 1));
|
||||
|
||||
/* Event to wait/block on */
|
||||
rxq->qr_qEvent = &rxq->qr_elanDone;
|
||||
|
||||
queue->input->q_event = SDRAM2ELAN(ctx, (void *)&rxq->qr_elanDone);
|
||||
queue->input->q_event =
|
||||
SDRAM2ELAN (ctx, (void *) &rxq->qr_elanDone);
|
||||
queue->input->q_fptr = rxq->qr_efitem;
|
||||
queue->input->q_bptr = rxq->qr_efitem;
|
||||
queue->input->q_control = E4_InputQueueControl(
|
||||
rxq->qr_efitem, rxq->qr_elitem, queue->rx_slotsize);
|
||||
queue->input->q_control =
|
||||
E4_InputQueueControl (rxq->qr_efitem, rxq->qr_elitem,
|
||||
queue->rx_slotsize);
|
||||
|
||||
/* The event */
|
||||
INITEVENT_WORD(ctx, (EVENT *)&rxq->qr_elanDone,
|
||||
INITEVENT_WORD (ctx, (EVENT *) & rxq->qr_elanDone,
|
||||
&rxq->qr_doneWord);
|
||||
RESETEVENT_WORD(&rxq->qr_doneWord);
|
||||
PRIMEEVENT_WORD(ctx, (EVENT *)&rxq->qr_elanDone, 1);
|
||||
RESETEVENT_WORD (&rxq->qr_doneWord);
|
||||
PRIMEEVENT_WORD (ctx, (EVENT *) & rxq->qr_elanDone, 1);
|
||||
|
||||
rxq->qr_cmdq = elan4_alloc_cmdq(
|
||||
ctx, rail->r_alloc,
|
||||
CQ_Size1K, CQ_WriteEnableBit | CQ_WaitEventEnableBit,
|
||||
NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_cmdq, NULL, OMPI_ERROR, 0);
|
||||
rxq->qr_cmdq = elan4_alloc_cmdq (ctx, rail->r_alloc,
|
||||
CQ_Size1K,
|
||||
CQ_WriteEnableBit |
|
||||
CQ_WaitEventEnableBit, NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_cmdq, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/* Allocate a sleepDesc for threads to block on */
|
||||
rxq->qr_es = ompi_init_elan_sleepdesc(&mca_ptl_elan_global_state,
|
||||
rxq->qr_es = ompi_init_elan_sleepdesc (&mca_ptl_elan_global_state,
|
||||
rxq->qr_rail);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_es, NULL, OMPI_ERROR, 0);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_es, NULL, OMPI_ERROR, 0);
|
||||
|
||||
OBJ_CONSTRUCT (&queue->rx_lock, ompi_mutex_t);
|
||||
}
|
||||
@ -213,12 +232,9 @@ ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
return (OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
int ompi_init_elan_rdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
{
|
||||
return (OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
int ompi_init_elan_sten (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
int
|
||||
ompi_init_elan_rdma (mca_ptl_elan_module_1_0_0_t * emp,
|
||||
int num_rails)
|
||||
{
|
||||
return (OMPI_SUCCESS);
|
||||
}
|
||||
|
@ -12,163 +12,20 @@
|
||||
#include "ptl_elan_peer.h"
|
||||
#include "ptl_elan_proc.h"
|
||||
#include "ptl_elan_frag.h"
|
||||
#include "ptl_elan_req.h"
|
||||
#include "ptl_elan_priv.h"
|
||||
|
||||
#define frag_header super.super.frag_header
|
||||
#define frag_owner super.super.frag_owner
|
||||
#define frag_peer super.super.frag_peer
|
||||
#define frag_convertor super.super.frag_convertor
|
||||
#if 0
|
||||
static void
|
||||
mca_ptl_elan_send_frag_construct (mca_ptl_elan_send_frag_t * frag)
|
||||
{
|
||||
|
||||
|
||||
static void mca_ptl_elan_send_frag_construct (mca_ptl_elan_send_frag_t *
|
||||
frag);
|
||||
static void mca_ptl_elan_send_frag_destruct (mca_ptl_elan_send_frag_t *
|
||||
frag);
|
||||
|
||||
static void mca_ptl_elan_recv_frag_construct (mca_ptl_elan_recv_frag_t *
|
||||
frag);
|
||||
static void mca_ptl_elan_recv_frag_destruct (mca_ptl_elan_recv_frag_t *
|
||||
frag);
|
||||
static bool mca_ptl_elan_recv_frag_header (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd,
|
||||
size_t);
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_ack (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd);
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_frag (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd);
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_match (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd);
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_data (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd);
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_discard (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd);
|
||||
|
||||
|
||||
ompi_class_t mca_ptl_elan_recv_frag_t_class = {
|
||||
"mca_ptl_elan_recv_frag_t",
|
||||
OBJ_CLASS (mca_ptl_base_recv_frag_t),
|
||||
(ompi_construct_t) mca_ptl_elan_recv_frag_construct,
|
||||
(ompi_destruct_t) mca_ptl_elan_recv_frag_destruct
|
||||
};
|
||||
|
||||
/*
|
||||
* ELAN fragment constructor
|
||||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
mca_ptl_elan_recv_frag_construct (mca_ptl_elan_recv_frag_t * frag)
|
||||
mca_ptl_elan_send_frag_destruct (mca_ptl_elan_send_frag_t * frag)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ELAN fragment destructor
|
||||
*/
|
||||
|
||||
static void
|
||||
mca_ptl_elan_recv_frag_destruct (mca_ptl_elan_recv_frag_t * frag)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Initialize a ELAN receive fragment for a specific peer.
|
||||
*/
|
||||
|
||||
void
|
||||
mca_ptl_elan_recv_frag_init (mca_ptl_elan_recv_frag_t * frag,
|
||||
mca_ptl_elan_peer_t * peer)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback from event library when socket has data available
|
||||
* for receive.
|
||||
*/
|
||||
|
||||
bool
|
||||
mca_ptl_elan_recv_frag_handler (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Receive fragment header
|
||||
*/
|
||||
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_header (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd,
|
||||
size_t size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Receive and process an ack.
|
||||
*/
|
||||
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_ack (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Receive and process a match request - first fragment.
|
||||
*/
|
||||
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_match (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Receive and process 2nd+ fragments of a multi-fragment message.
|
||||
*/
|
||||
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_frag (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Continue with non-blocking recv() calls until the entire
|
||||
* fragment is received.
|
||||
*/
|
||||
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_data (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* If the app posted a receive buffer smaller than the
|
||||
* fragment, receive and discard remaining bytes.
|
||||
*/
|
||||
|
||||
static bool
|
||||
mca_ptl_elan_recv_frag_discard (mca_ptl_elan_recv_frag_t * frag,
|
||||
int sd)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
ompi_class_t mca_ptl_elan_send_frag_t_class = {
|
||||
@ -177,48 +34,112 @@ ompi_class_t mca_ptl_elan_send_frag_t_class = {
|
||||
(ompi_construct_t) mca_ptl_elan_send_frag_construct,
|
||||
(ompi_destruct_t) mca_ptl_elan_send_frag_destruct
|
||||
};
|
||||
|
||||
/*
|
||||
* Placeholders for send fragment constructor/destructors.
|
||||
*/
|
||||
#endif
|
||||
|
||||
static void
|
||||
mca_ptl_elan_send_frag_construct (mca_ptl_elan_send_frag_t * frag)
|
||||
mca_ptl_elan_recv_frag_construct (mca_ptl_elan_recv_frag_t * frag)
|
||||
{
|
||||
OBJ_CONSTRUCT (frag, mca_ptl_elan_recv_frag_t);
|
||||
frag->frag_hdr_cnt = 0;
|
||||
frag->frag_msg_cnt = 0;
|
||||
frag->frag.qdma = NULL;
|
||||
frag->alloc_buff = (char *) malloc (sizeof (char) * 2048 + 32);
|
||||
if (NULL == frag->alloc_buff) {
|
||||
ompi_output (0,
|
||||
"[%s:%d] Fatal error, unable to allocate recv buff \n",
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
frag->unex_buff = (char *) (((int) frag->alloc_buff + 32) >> 5 << 5);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
mca_ptl_elan_send_frag_destruct (mca_ptl_elan_send_frag_t * frag)
|
||||
mca_ptl_elan_recv_frag_destruct (mca_ptl_elan_recv_frag_t * frag)
|
||||
{
|
||||
/* Does this destruct free the memory? since OBJ_DESTRUCT,
|
||||
* works only for non-dynamically allocated objects */
|
||||
frag->frag_hdr_cnt = 0;
|
||||
frag->frag_msg_cnt = 0;
|
||||
frag->frag.qdma = NULL;
|
||||
free (frag->alloc_buff);
|
||||
frag->alloc_buff = NULL;
|
||||
frag->unex_buff = NULL;
|
||||
OBJ_DESTRUCT (frag);
|
||||
}
|
||||
|
||||
ompi_class_t mca_ptl_elan_recv_frag_t_class = {
|
||||
"mca_ptl_elan_recv_frag_t",
|
||||
OBJ_CLASS (mca_ptl_base_recv_frag_t),
|
||||
(ompi_construct_t) mca_ptl_elan_recv_frag_construct,
|
||||
(ompi_destruct_t) mca_ptl_elan_recv_frag_destruct
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize the fragment based on the current offset into the users
|
||||
* data buffer, and the indicated size.
|
||||
extern mca_ptl_elan_state_t mca_ptl_elan_global_state;
|
||||
|
||||
mca_ptl_elan_desc_item_t *
|
||||
mca_ptl_elan_alloc_send_desc (struct mca_pml_base_send_request_t *req)
|
||||
{
|
||||
struct ompi_ptl_elan_queue_ctrl_t *queue;
|
||||
mca_ptl_elan_t *ptl;
|
||||
struct mca_ptl_elan_peer_t *peer;
|
||||
size_t offset;
|
||||
size_t size;
|
||||
|
||||
ompi_free_list_t *flist;
|
||||
ompi_list_item_t *item;
|
||||
mca_ptl_elan_desc_item_t *desc;
|
||||
|
||||
ptl = (mca_ptl_elan_t *) req->req_owner;
|
||||
peer = (mca_ptl_elan_peer_t *) req->req_peer;
|
||||
offset = req->req_offset;
|
||||
size = ptl->super.ptl_first_frag_size;
|
||||
|
||||
/* For now, bind to queue DMA directly */
|
||||
{
|
||||
queue = ptl->queue;
|
||||
flist = &queue->tx_desc_free;
|
||||
|
||||
if (ompi_using_threads ()) {
|
||||
|
||||
ompi_mutex_lock (&((flist)->fl_lock));
|
||||
item = ompi_list_remove_first (&((flist)->super));
|
||||
|
||||
/* Progress this PTL module to get back a descriptor,
|
||||
* Is it OK to progress with ptl->ptl_send_progress? */
|
||||
while (NULL == item) {
|
||||
mca_ptl_tstamp_t tstamp = 0;
|
||||
|
||||
ptl->super.ptl_module->ptlm_progress (tstamp);
|
||||
item = ompi_list_remove_first (&((flist)->super));
|
||||
}
|
||||
ompi_mutex_unlock (&((flist)->fl_lock));
|
||||
} else {
|
||||
item = ompi_list_remove_first (&((flist)->super));
|
||||
|
||||
/* Progress this PTL module to get back a descriptor,
|
||||
* Is it OK to progress with ptl->ptl_send_progress? */
|
||||
while (NULL == item) {
|
||||
mca_ptl_tstamp_t tstamp = 0;
|
||||
|
||||
/*
|
||||
* Well, this still does not trigger the progress on
|
||||
* PTL's from other modules. Wait for PML to change.
|
||||
* Otherwise have to trigger PML progress from PTL. Ouch..
|
||||
*/
|
||||
ptl->super.ptl_module->ptlm_progress (tstamp);
|
||||
item = ompi_list_remove_first (&((flist)->super));
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
mca_ptl_elan_send_frag_init (mca_ptl_elan_send_frag_t * sendfrag,
|
||||
mca_ptl_elan_peer_t * ptl_peer,
|
||||
mca_pml_base_send_request_t * sendreq,
|
||||
size_t offset,
|
||||
size_t * size,
|
||||
int flags)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
((struct mca_ptl_elan_send_request_t *) req)->desc_type
|
||||
= MCA_PTL_ELAN_QDMA_DESC;
|
||||
desc = (mca_ptl_elan_desc_item_t *) item;
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The socket is setup as non-blocking, writes are handled asynchronously,
|
||||
* with event callbacks when the socket is ready for writes.
|
||||
*/
|
||||
|
||||
bool
|
||||
mca_ptl_elan_send_frag_handler (mca_ptl_elan_send_frag_t * frag,
|
||||
int sd)
|
||||
mca_ptl_elan_recv_frag_t *
|
||||
mca_ptl_elan_alloc_recv_desc (struct mca_pml_base_recv_request_t * req)
|
||||
{
|
||||
return true;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -17,96 +17,63 @@
|
||||
#include "mca/ptl/base/ptl_base_recvfrag.h"
|
||||
#include "ptl_elan.h"
|
||||
|
||||
extern ompi_class_t mca_ptl_elan_send_frag_t_class;
|
||||
extern ompi_class_t mca_ptl_elan_recv_frag_t_class;
|
||||
|
||||
struct mca_ptl_elan_peer_t;
|
||||
struct ompi_ptl_elan_qdma_frag_t;
|
||||
struct ompi_ptl_elan_putget_frag_t;
|
||||
|
||||
/**
|
||||
* ELAN send fragment derived type.
|
||||
* ELAN descriptor for send
|
||||
*/
|
||||
struct mca_ptl_elan_send_frag_t {
|
||||
mca_ptl_base_send_frag_t super; /**< base send fragment descriptor */
|
||||
size_t frag_vec_cnt;
|
||||
volatile int frag_progressed;
|
||||
union mca_ptl_elan_send_desc_t {
|
||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
||||
struct ompi_ptl_elan_putget_desc_t *putget;
|
||||
};
|
||||
typedef struct mca_ptl_elan_send_frag_t mca_ptl_elan_send_frag_t;
|
||||
typedef union mca_ptl_elan_send_desc_t mca_ptl_elan_send_desc_t;
|
||||
|
||||
|
||||
#define MCA_PTL_ELAN_SEND_FRAG_ALLOC(item, rc) \
|
||||
OMPI_FREE_LIST_GET(&mca_ptl_elan_module.elan_send_frags, item, rc);
|
||||
|
||||
|
||||
#define MCA_PTL_ELAN_RECV_FRAG_ALLOC(frag, rc) \
|
||||
{ \
|
||||
ompi_list_item_t* item; \
|
||||
OMPI_FREE_LIST_GET(&mca_ptl_elan_module.elan_recv_frags, item, rc); \
|
||||
frag = (mca_ptl_elan_recv_frag_t*)item; \
|
||||
}
|
||||
|
||||
bool
|
||||
mca_ptl_elan_send_frag_handler (mca_ptl_elan_send_frag_t *, int sd);
|
||||
|
||||
|
||||
int
|
||||
mca_ptl_elan_send_frag_init (mca_ptl_elan_send_frag_t *,
|
||||
struct mca_ptl_elan_peer_t *,
|
||||
struct mca_pml_base_send_request_t *,
|
||||
size_t offset, size_t * size, int flags);
|
||||
|
||||
extern ompi_class_t mca_ptl_elan_recv_frag_t_class;
|
||||
struct mca_ptl_elan_desc_item_t {
|
||||
ompi_list_item_t super;
|
||||
mca_ptl_elan_send_desc_t item;
|
||||
};
|
||||
typedef struct mca_ptl_elan_desc_item_t mca_ptl_elan_desc_item_t;
|
||||
|
||||
/**
|
||||
* ELAN received fragment derived type.
|
||||
*/
|
||||
struct mca_ptl_elan_recv_frag_t {
|
||||
mca_ptl_base_recv_frag_t super; /**< base receive fragment descriptor */
|
||||
size_t frag_hdr_cnt; /**< number of header bytes received */
|
||||
size_t frag_msg_cnt; /**< number of message bytes received */
|
||||
bool frag_ack_pending; /**< an ack pending for this fragment */
|
||||
volatile int frag_progressed; /**< flag to atomically progress */
|
||||
mca_ptl_base_recv_frag_t super;
|
||||
size_t frag_hdr_cnt;
|
||||
size_t frag_msg_cnt;
|
||||
union {
|
||||
struct ompi_ptl_elan_qdma_frag_t *qdma;
|
||||
struct ompi_ptl_elan_putget_frag_t *putget;
|
||||
} frag;
|
||||
char *alloc_buff;
|
||||
char *unex_buff;
|
||||
};
|
||||
typedef struct mca_ptl_elan_recv_frag_t mca_ptl_elan_recv_frag_t;
|
||||
|
||||
bool
|
||||
mca_ptl_elan_recv_frag_handler (mca_ptl_elan_recv_frag_t *, int sd);
|
||||
mca_ptl_elan_desc_item_t *
|
||||
mca_ptl_elan_alloc_send_desc(struct mca_pml_base_send_request_t *req);
|
||||
|
||||
void
|
||||
mca_ptl_elan_recv_frag_init (mca_ptl_elan_recv_frag_t * frag,
|
||||
struct mca_ptl_elan_peer_t *peer);
|
||||
|
||||
bool
|
||||
mca_ptl_elan_recv_frag_send_ack (mca_ptl_elan_recv_frag_t * frag);
|
||||
|
||||
/*
|
||||
* For fragments that require an acknowledgment, this routine will be called
|
||||
* twice, once when the send completes, and again when the acknowledgment is
|
||||
* returned. Only the last caller should update the request status, so we
|
||||
* add a lock w/ the frag_progressed flag.
|
||||
*/
|
||||
static inline void
|
||||
mca_ptl_elan_send_frag_progress (mca_ptl_elan_send_frag_t * frag)
|
||||
{
|
||||
return;
|
||||
}
|
||||
mca_ptl_elan_recv_frag_t *
|
||||
mca_ptl_elan_alloc_recv_desc(struct mca_pml_base_recv_request_t *req);
|
||||
|
||||
static inline void
|
||||
mca_ptl_elan_send_frag_init_ack (mca_ptl_elan_send_frag_t * ack,
|
||||
struct mca_ptl_t *ptl,
|
||||
struct mca_ptl_elan_peer_t *ptl_peer,
|
||||
mca_ptl_elan_recv_frag_t * frag)
|
||||
mca_ptl_elan_recv_frag_progress(mca_ptl_elan_recv_frag_t* frag)
|
||||
{
|
||||
return;
|
||||
/* make sure this only happens once for threaded case */
|
||||
mca_pml_base_recv_request_t* request;
|
||||
mca_ptl_base_recv_progress_fn_t progress;
|
||||
|
||||
progress = (frag)->super.super.frag_owner->ptl_recv_progress;
|
||||
request = (frag)->super.frag_request;
|
||||
|
||||
/* progress the request */
|
||||
progress((frag)->super.super.frag_owner, request, &(frag)->super);
|
||||
mca_ptl_elan_recv_frag_return((frag)->super.super.frag_owner, (frag));
|
||||
}
|
||||
|
||||
static inline void
|
||||
mca_ptl_elan_recv_frag_matched (mca_ptl_elan_recv_frag_t * frag)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void
|
||||
mca_ptl_elan_recv_frag_progress (mca_ptl_elan_recv_frag_t * frag)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#define _ELAN4
|
||||
#define __elan4__
|
||||
#define __elan4
|
||||
|
||||
#include "ptl_elan.h"
|
||||
#include "ptl_elan_priv.h"
|
||||
@ -15,19 +15,10 @@
|
||||
|
||||
mca_ptl_elan_state_t mca_ptl_elan_global_state;
|
||||
|
||||
static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
|
||||
static int
|
||||
ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
|
||||
{
|
||||
/* TODO:
|
||||
* a) create elan PTL instances
|
||||
* b) init transport structure for all rails
|
||||
* including STEN, QDMA and RDMA
|
||||
* c) For each possible transport mechanism, allocate:
|
||||
* send/recv descriptors;
|
||||
* system buffer;
|
||||
* event structure for transport control
|
||||
* d) initialize STAT (including SYNC) structures.
|
||||
*/
|
||||
mca_ptl_elan_module_1_0_0_t * emp;
|
||||
mca_ptl_elan_module_1_0_0_t *emp;
|
||||
int rail_count;
|
||||
|
||||
emp = ems->elan_module;
|
||||
@ -44,29 +35,30 @@ static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
|
||||
char param[256];
|
||||
mca_ptl_elan_t *ptl;
|
||||
|
||||
ptl = malloc(sizeof(mca_ptl_elan_t));
|
||||
if(NULL == ptl) {
|
||||
ptl = malloc (sizeof (mca_ptl_elan_t));
|
||||
if (NULL == ptl) {
|
||||
ompi_output (0,
|
||||
"[%s:%d] error in malloc for ptl structures \n",
|
||||
__FILE__, __LINE__);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
memcpy(ptl, &mca_ptl_elan, sizeof(mca_ptl_elan));
|
||||
memcpy (ptl, &mca_ptl_elan, sizeof (mca_ptl_elan));
|
||||
emp->elan_ptls[emp->elan_num_ptls] = ptl;
|
||||
|
||||
/* MCA related structures */
|
||||
|
||||
ptl->ptl_ni_local = emp->elan_num_ptls;
|
||||
ptl->ptl_ni_total = rail_count;
|
||||
emp->elan_num_ptls ++;
|
||||
emp->elan_num_ptls++;
|
||||
|
||||
/* allow user to specify per rail bandwidth and latency */
|
||||
sprintf(param, "bandwidth_elanrail%d", emp->elan_num_ptls);
|
||||
sprintf (param, "bandwidth_elanrail%d", emp->elan_num_ptls);
|
||||
ptl->super.ptl_bandwidth =
|
||||
mca_ptl_elan_param_register_int(param, 1000);
|
||||
sprintf(param, "latency_elanrail%d", emp->elan_num_ptls);
|
||||
ptl->super.ptl_latency = mca_ptl_elan_param_register_int(param, 1);
|
||||
mca_ptl_elan_param_register_int (param, 1000);
|
||||
sprintf (param, "latency_elanrail%d", emp->elan_num_ptls);
|
||||
ptl->super.ptl_latency =
|
||||
mca_ptl_elan_param_register_int (param, 1);
|
||||
|
||||
/* Setup elan related structures such as ctx, rail */
|
||||
ptl->ptl_elan_rail = ems->elan_rail[rail_count];
|
||||
@ -75,9 +67,7 @@ static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
|
||||
ptl->elan_nvp = ems->elan_nvp;
|
||||
} while (emp->elan_num_ptls < rail_count);
|
||||
|
||||
/* Allocating all the communication strcutures for PTL's,
|
||||
* XXX: Leave it later after finalization is done
|
||||
*/
|
||||
/* Allocating all the communication strcutures for PTL's, */
|
||||
if (OMPI_SUCCESS != ompi_init_elan_qdma (emp, rail_count)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -89,10 +79,9 @@ static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (OMPI_SUCCESS != ompi_init_elan_sten (emp, rail_count)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: initialize STAT (including SYNC) structures.
|
||||
*/
|
||||
if (OMPI_SUCCESS != ompi_init_elan_stat (emp, rail_count)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -100,17 +89,7 @@ static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
|
||||
return (OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
/* Attach to the network:
|
||||
* a) First add myself into the capability
|
||||
* b) For each rail,
|
||||
* Block the inputter for any incoming traffic
|
||||
* attach to the device
|
||||
* Find out the location
|
||||
* Fill out the vp in main and elan structures.
|
||||
* create a cookiePool for threading control
|
||||
* c) Allocate a cookiePool for export Oth Rail
|
||||
* d) Find out total vpids, localVpids, localId and number of locals
|
||||
*/
|
||||
/* Attach to the network */
|
||||
static int
|
||||
ompi_elan_attach_network (mca_ptl_elan_state_t * ems)
|
||||
{
|
||||
@ -237,61 +216,66 @@ ompi_elan_attach_network (mca_ptl_elan_state_t * ems)
|
||||
}
|
||||
|
||||
static void
|
||||
ompi_module_elan_close_ptls (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
ompi_module_elan_close_ptls (mca_ptl_elan_module_1_0_0_t * emp,
|
||||
int num_rails)
|
||||
{
|
||||
|
||||
/* TODO: find the ones that are still there and free them */
|
||||
}
|
||||
|
||||
static void
|
||||
ompi_module_elan_close_procs (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
|
||||
ompi_module_elan_close_procs (mca_ptl_elan_module_1_0_0_t * emp,
|
||||
int num_rails)
|
||||
{
|
||||
|
||||
/* TODO: find the ones that are still there and free them */
|
||||
}
|
||||
|
||||
static void ompi_init_elan_queue_events(ompi_ptl_elan_queue_ctrl_t *queue)
|
||||
static void
|
||||
ompi_init_elan_queue_events (ompi_ptl_elan_queue_ctrl_t * queue)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
ELAN_SLEEP *
|
||||
ompi_init_elan_sleepdesc(mca_ptl_elan_state_t * ems, RAIL *rail)
|
||||
ompi_init_elan_sleepdesc (mca_ptl_elan_state_t * ems,
|
||||
RAIL * rail)
|
||||
{
|
||||
ELAN_SLEEP *es;
|
||||
|
||||
/* XXX: asking the caller to hold the lock */
|
||||
|
||||
es = MALLOC(sizeof(ELAN_SLEEP));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(es, NULL, NULL, 0);
|
||||
memset(es, 0, sizeof(ELAN_SLEEP));
|
||||
es = MALLOC (sizeof (ELAN_SLEEP));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (es, NULL, NULL, 0);
|
||||
memset (es, 0, sizeof (ELAN_SLEEP));
|
||||
|
||||
/* Assign next interrupt cookie value */
|
||||
es->es_cookie = ems->intcookie++;
|
||||
|
||||
/* XXX, rail[0] is choosen instead this rail */
|
||||
if (elan4_alloc_intcookie(ems->elan_rail[0]->rail_ctx,
|
||||
if (elan4_alloc_intcookie (ems->elan_rail[0]->rail_ctx,
|
||||
es->es_cookie) < 0) {
|
||||
ompi_output(0,
|
||||
ompi_output (0,
|
||||
"[%s:%d] Failed to allocate IRQ cookie \n",
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
|
||||
es->es_cmdBlk = ALLOC_ELAN(rail, E4_EVENTBLOCK_SIZE,
|
||||
es->es_cmdBlk = ALLOC_ELAN (rail, E4_EVENTBLOCK_SIZE,
|
||||
E4_EVENTBLOCK_SIZE);
|
||||
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(es->es_cmdBlk, 0, NULL, 0);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (es->es_cmdBlk, 0, NULL, 0);
|
||||
|
||||
/*Allocate a pair of command queues for blocking waits with*/
|
||||
es->es_cmdq = elan4_alloc_cmdq(rail->r_ctx, rail->r_alloc,
|
||||
CQ_Size1K, CQ_WriteEnableBit | CQ_WaitEventEnableBit,
|
||||
NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(es->es_cmdq, NULL, NULL, 0);
|
||||
/*Allocate a pair of command queues for blocking waits with */
|
||||
es->es_cmdq = elan4_alloc_cmdq (rail->r_ctx, rail->r_alloc,
|
||||
CQ_Size1K,
|
||||
CQ_WriteEnableBit |
|
||||
CQ_WaitEventEnableBit, NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (es->es_cmdq, NULL, NULL, 0);
|
||||
|
||||
/* This command queue used to fire the IRQ via
|
||||
a cmd port copy event */
|
||||
es->es_ecmdq = elan4_alloc_cmdq(rail->r_ctx,
|
||||
rail->r_alloc, CQ_Size1K, /* CQ_EnableAllBits, */
|
||||
CQ_WriteEnableBit | CQ_InterruptEnableBit, NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(es->es_ecmdq, NULL, NULL, 0);
|
||||
es->es_ecmdq = elan4_alloc_cmdq (rail->r_ctx, rail->r_alloc, CQ_Size1K, /* CQ_EnableAllBits, */
|
||||
CQ_WriteEnableBit |
|
||||
CQ_InterruptEnableBit, NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (es->es_ecmdq, NULL, NULL, 0);
|
||||
es->es_next = NULL;
|
||||
|
||||
/* XXX: asking the caller to release the lock */
|
||||
@ -299,7 +283,6 @@ ompi_init_elan_sleepdesc(mca_ptl_elan_state_t * ems, RAIL *rail)
|
||||
return es;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
{
|
||||
@ -351,7 +334,7 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
|
||||
/* Allocate elan capability from the heap */
|
||||
ems->elan_cap = (ELAN_CAPABILITY *) malloc (sizeof (ELAN_CAPABILITY));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(ems->elan_cap, NULL, OMPI_ERROR, 0);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (ems->elan_cap, NULL, OMPI_ERROR, 0);
|
||||
memset (ems->elan_cap, 0, sizeof (ELAN_CAPABILITY));
|
||||
|
||||
/* Process the capability info supplied by RMS */
|
||||
@ -373,21 +356,21 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
}
|
||||
|
||||
ems->all_rails = (RAIL **) malloc (sizeof (RAIL *) * num_rails);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(ems->all_rails, NULL,
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (ems->all_rails, NULL,
|
||||
OMPI_ERR_OUT_OF_RESOURCE, 0);
|
||||
|
||||
ems->all_estates = (ADDR_SDRAM *)
|
||||
malloc (sizeof (ELAN_ESTATE *) * num_rails);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(ems->all_estates, NULL,
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (ems->all_estates, NULL,
|
||||
OMPI_ERR_OUT_OF_RESOURCE, 0);
|
||||
|
||||
rails = (int *) malloc (sizeof (int) * num_rails);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rails, NULL, OMPI_ERR_OUT_OF_RESOURCE, 0);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rails, NULL, OMPI_ERR_OUT_OF_RESOURCE, 0);
|
||||
(void) elan_rails (ems->elan_cap, rails);
|
||||
|
||||
ems->elan_rail = (ELAN_RAIL **) malloc (sizeof (ELAN_RAIL **)
|
||||
* (num_rails + 1));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(ems->elan_rail, NULL,
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (ems->elan_rail, NULL,
|
||||
OMPI_ERR_OUT_OF_RESOURCE, 0);
|
||||
ems->elan_rail[num_rails] = NULL;
|
||||
|
||||
@ -398,11 +381,11 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
|
||||
/* Magic quadrics number for the starting cookie value */
|
||||
ems->intcookie = 42;
|
||||
ems->rail_intcookie = (int*) malloc (sizeof (int)*(num_rails + 1));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(ems->rail_intcookie, NULL,
|
||||
ems->rail_intcookie = (int *) malloc (sizeof (int) * (num_rails + 1));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (ems->rail_intcookie, NULL,
|
||||
OMPI_ERR_OUT_OF_RESOURCE, 0);
|
||||
memset (ems->elan_cap, 0, (num_rails + 1) * sizeof (int));
|
||||
ems->rail_intcookie[num_rails] = NULL;
|
||||
memset ((void*)ems->elan_cap, 0, (num_rails + 1) * sizeof (int));
|
||||
ems->rail_intcookie[num_rails] = 0;
|
||||
|
||||
for (i = 0; i < num_rails; i++) {
|
||||
|
||||
@ -413,18 +396,19 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
|
||||
/* Allocate the Main memory control structure for this rail */
|
||||
rail = ems->all_rails[i] = (RAIL *) malloc (sizeof (RAIL));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rail, NULL, OMPI_ERROR, 0);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rail, NULL, OMPI_ERROR, 0);
|
||||
memset (rail, 0, sizeof (RAIL));
|
||||
|
||||
rail->r_ctx = elan4_init (rails[i]);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_ctx, NULL, OMPI_ERROR, 0);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rail->r_ctx, NULL, OMPI_ERROR, 0);
|
||||
|
||||
rail->r_sdram = elan4_open_sdram (rails[i], 0, alloc_elansize);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_sdram, NULL, OMPI_ERROR, 0);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rail->r_sdram, NULL, OMPI_ERROR, 0);
|
||||
|
||||
rail->r_alloc = elan4_createAllocator (ems->main_size,
|
||||
rail->r_sdram, 0, ems->elan_size);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_alloc, NULL, OMPI_ERROR, 0);
|
||||
rail->r_sdram, 0,
|
||||
ems->elan_size);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rail->r_alloc, NULL, OMPI_ERROR, 0);
|
||||
|
||||
if (elan4_set_standard_mappings (rail->r_ctx) < 0
|
||||
|| elan4_set_required_mappings (rail->r_ctx) < 0) {
|
||||
@ -434,18 +418,12 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
#if 0 /* Is this only needed for TPORT support? */
|
||||
if (elan4_register_trap_handler(rail->r_ctx, UTS_UNIMP_INSTR,
|
||||
UTS_TPROC, elan_unimp_handler, NULL) < 0) {
|
||||
ompi_output(0, "elan_init(%d): Failed elan4_register_unimp()", i);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Now allocate the SDRAM Elan control structure for this rail */
|
||||
estate = ems->all_estates[i] = elan4_allocElan (rail->r_alloc,
|
||||
ELAN_ALIGN, sizeof (ELAN_EPRIVSTATE));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(estate, NULL, OMPI_ERROR, 0);
|
||||
ELAN_ALIGN,
|
||||
sizeof
|
||||
(ELAN_EPRIVSTATE));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (estate, NULL, OMPI_ERROR, 0);
|
||||
|
||||
priv_estate = (ELAN_EPRIVSTATE *) estate;
|
||||
memset (priv_estate, 0, sizeof (ELAN_EPRIVSTATE));
|
||||
@ -454,26 +432,28 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
rail->r_cmdq = elan4_alloc_cmdq (rail->r_ctx,
|
||||
rail->r_alloc,
|
||||
CQ_Size8K,
|
||||
CQ_ModifyEnableBit | CQ_WriteEnableBit | CQ_WaitEventEnableBit
|
||||
| CQ_SetEventEnableBit | CQ_ThreadStartEnableBit,
|
||||
NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_cmdq, NULL, OMPI_ERROR, 0);
|
||||
CQ_ModifyEnableBit |
|
||||
CQ_WriteEnableBit |
|
||||
CQ_WaitEventEnableBit |
|
||||
CQ_SetEventEnableBit |
|
||||
CQ_ThreadStartEnableBit, NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rail->r_cmdq, NULL, OMPI_ERROR, 0);
|
||||
|
||||
/* Allocate a command port for thread rescheduling etc */
|
||||
rail->r_ecmdq = elan4_alloc_cmdq (rail->r_ctx, rail->r_alloc,
|
||||
CQ_Size8K, CQ_EnableAllBits, NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_ecmdq, NULL, OMPI_ERROR, 0);
|
||||
CQ_Size8K, CQ_EnableAllBits,
|
||||
NULL);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rail->r_ecmdq, NULL, OMPI_ERROR, 0);
|
||||
|
||||
priv_estate->cport = MAIN2ELAN (rail->r_ctx,
|
||||
rail->r_ecmdq->cmdq_mapping);
|
||||
|
||||
/* save the rail pointers */
|
||||
/* Save the rail pointers */
|
||||
ems->elan_rail[i] = (ELAN_RAIL *) rail;
|
||||
ems->rail_intcookie[i] = ems->intcookie;
|
||||
|
||||
/* Allocate a Sleep Desc */
|
||||
|
||||
es = ompi_init_elan_sleepdesc(ems, rail);
|
||||
es = ompi_init_elan_sleepdesc (ems, rail);
|
||||
|
||||
/* XXX: put a lock and hold a lock */
|
||||
es->es_next = rail->r_sleepDescs;
|
||||
@ -492,7 +472,7 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
/*ompi_elan_railtable_t *rt; */
|
||||
struct railtable *rt;
|
||||
rt = (struct railtable *) malloc (sizeof (struct railtable));
|
||||
OMPI_PTL_ELAN_CHECK_UNEX(rt, NULL, OMPI_ERROR, 0);
|
||||
OMPI_PTL_ELAN_CHECK_UNEX (rt, NULL, OMPI_ERROR, 0);
|
||||
memset (rt, 0, sizeof (struct railtable));
|
||||
|
||||
rt->rt_nrails = 1;
|
||||
@ -549,9 +529,6 @@ ompi_mca_ptl_elan_finalize (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
ompi_module_elan_close_ptls (emp, num_rails);
|
||||
ompi_module_elan_close_procs (emp, num_rails);
|
||||
|
||||
/* Cleanup the global state
|
||||
* Free per rail structures, then the references to them */
|
||||
|
||||
for (i = 0; i < num_rails; i++) {
|
||||
RAIL *rail;
|
||||
|
||||
@ -560,12 +537,9 @@ ompi_mca_ptl_elan_finalize (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
free (rail->r_railTable);
|
||||
|
||||
/* Free the memory from the rail allocator */
|
||||
elan4_freeMain(rail->r_alloc, rail->r_ecmdq);
|
||||
elan4_freeMain(rail->r_alloc, rail->r_cmdq);
|
||||
elan4_freeElan(rail->r_alloc, ems->all_estates[i]);
|
||||
|
||||
/* Free cookie value, Destroy allocator and SDRAM handler and
|
||||
then close device */
|
||||
elan4_freeMain (rail->r_alloc, rail->r_ecmdq);
|
||||
elan4_freeMain (rail->r_alloc, rail->r_cmdq);
|
||||
elan4_freeElan (rail->r_alloc, ems->all_estates[i]);
|
||||
|
||||
/* Since the cookie allocated from rail[0], be consistent here */
|
||||
elan4_free_intcookie (ems->all_rails[0]->r_ctx,
|
||||
@ -576,16 +550,15 @@ ompi_mca_ptl_elan_finalize (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
|
||||
/*elan4_fini (rail->r_ctx); Not working yet */
|
||||
|
||||
/* Free the rail structure, why two pointers are used to
|
||||
* point to the same RAIL, all_rails and elan_rails */
|
||||
/*free (ems->elan_rail[i]);*/
|
||||
/* Free the rail structure used one the array of pointers
|
||||
* to the RAILs, either all_rails for elan_rails */
|
||||
free (ems->all_rails[i]);
|
||||
}
|
||||
|
||||
free(ems->elan_rail);
|
||||
free(ems->all_estates);
|
||||
free(ems->all_rails);
|
||||
free(ems->elan_cap);
|
||||
free (ems->elan_rail);
|
||||
free (ems->all_estates);
|
||||
free (ems->all_rails);
|
||||
free (ems->elan_cap);
|
||||
|
||||
return (OMPI_SUCCESS);
|
||||
}
|
||||
|
@ -94,19 +94,21 @@ mca_ptl_elan_module_open (void)
|
||||
mca_ptl_elan.super.ptl_exclusivity =
|
||||
mca_ptl_elan_param_register_int ("exclusivity", 0);
|
||||
mca_ptl_elan.super.ptl_first_frag_size =
|
||||
mca_ptl_elan_param_register_int ("first_frag_size", 2048/*magic*/);
|
||||
mca_ptl_elan_param_register_int ("first_frag_size",
|
||||
(2048 - sizeof(mca_ptl_base_header_t))/*magic*/);
|
||||
mca_ptl_elan.super.ptl_min_frag_size =
|
||||
mca_ptl_elan_param_register_int ("min_frag_size", 320);
|
||||
mca_ptl_elan_param_register_int ("min_frag_size",
|
||||
(2048 - sizeof(mca_ptl_base_header_t))/*magic*/);
|
||||
mca_ptl_elan.super.ptl_max_frag_size =
|
||||
mca_ptl_elan_param_register_int ("max_frag_size", -1);
|
||||
mca_ptl_elan_param_register_int ("max_frag_size", 2<<30);
|
||||
|
||||
/* register ELAN module parameters */
|
||||
elan_mp->elan_free_list_num =
|
||||
mca_ptl_elan_param_register_int ("free_list_num", 64);
|
||||
mca_ptl_elan_param_register_int ("free_list_num", 32);
|
||||
elan_mp->elan_free_list_max =
|
||||
mca_ptl_elan_param_register_int ("free_list_max", -1);
|
||||
mca_ptl_elan_param_register_int ("free_list_max", 1024);
|
||||
elan_mp->elan_free_list_inc =
|
||||
mca_ptl_elan_param_register_int ("free_list_inc", 64);
|
||||
mca_ptl_elan_param_register_int ("free_list_inc", 32);
|
||||
|
||||
/* initialize state */
|
||||
elan_mp->elan_ptls = NULL;
|
||||
@ -115,14 +117,13 @@ mca_ptl_elan_module_open (void)
|
||||
|
||||
/* initialize list */
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_reqs, ompi_list_t);
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_prog_events, ompi_list_t);
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_comp_events, ompi_list_t);
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_procs, ompi_list_t);
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_pending_acks, ompi_list_t);
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_recv_frags, ompi_list_t);
|
||||
|
||||
/* initialize free list */
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_events_free, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_reqs, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_reqs_free, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_recv_frags_free, ompi_free_list_t);
|
||||
|
||||
/* initialize other objects */
|
||||
OBJ_CONSTRUCT (&elan_mp->elan_lock, ompi_mutex_t);
|
||||
@ -161,24 +162,27 @@ mca_ptl_elan_module_close (void)
|
||||
elan_mp->elan_reqs_free.super.ompi_list_length);
|
||||
}
|
||||
|
||||
if (elan_mp->elan_events_free.fl_num_allocated !=
|
||||
elan_mp->elan_events_free.super.ompi_list_length) {
|
||||
ompi_output (0, "elan events: %d allocated %d returned\n",
|
||||
elan_mp->elan_reqs_free.fl_num_allocated,
|
||||
elan_mp->elan_reqs_free.super.ompi_list_length);
|
||||
if (elan_mp->elan_recv_frags_free.fl_num_allocated !=
|
||||
elan_mp->elan_recv_frags_free.super.ompi_list_length) {
|
||||
ompi_output (0, "elan requests: %d allocated %d returned\n",
|
||||
elan_mp->elan_recv_frags_free.fl_num_allocated,
|
||||
elan_mp->elan_recv_frags_free.super.ompi_list_length);
|
||||
}
|
||||
|
||||
/* FIXME: free free list entries before destructing lists */
|
||||
|
||||
/* Free the empty list holders */
|
||||
OBJ_DESTRUCT (&(elan_mp->elan_reqs));
|
||||
OBJ_DESTRUCT (&(elan_mp->elan_prog_events));
|
||||
OBJ_DESTRUCT (&(elan_mp->elan_comp_events));
|
||||
OBJ_DESTRUCT (&(elan_mp->elan_procs));
|
||||
OBJ_DESTRUCT (&(elan_mp->elan_pending_acks));
|
||||
OBJ_DESTRUCT (&(elan_mp->elan_recv_frags));
|
||||
|
||||
/* TODO:
|
||||
* We need free all the memory allocated for this list
|
||||
* before desctructing this free_list */
|
||||
|
||||
/* Destruct the free lists */
|
||||
OBJ_DESTRUCT (&(elan_mp->elan_events_free));
|
||||
OBJ_DESTRUCT (&(elan_mp->elan_reqs_free));
|
||||
OBJ_DESTRUCT (&(elan_mp->elan_recv_frags_free));
|
||||
|
||||
/* Destruct other structures */
|
||||
OBJ_DESTRUCT (&elan_mp->elan_lock);
|
||||
@ -198,15 +202,14 @@ mca_ptl_elan_module_init (int *num_ptls,
|
||||
bool * have_hidden_threads)
|
||||
{
|
||||
mca_ptl_t **ptls;
|
||||
int rc;
|
||||
|
||||
*num_ptls = 0;
|
||||
|
||||
/* TODO: support multiple threads */
|
||||
|
||||
*allow_multi_user_threads = true;
|
||||
*have_hidden_threads = OMPI_HAVE_THREADS;
|
||||
|
||||
/* Leave the thread related setting to PML:PTL(TCP) to decide */
|
||||
|
||||
/* initialize free lists */
|
||||
ompi_free_list_init (&(elan_mp->elan_reqs_free),
|
||||
sizeof (mca_ptl_elan_send_request_t),
|
||||
OBJ_CLASS (mca_ptl_elan_send_request_t),
|
||||
@ -214,15 +217,13 @@ mca_ptl_elan_module_init (int *num_ptls,
|
||||
elan_mp->elan_free_list_max,
|
||||
elan_mp->elan_free_list_inc, NULL);
|
||||
|
||||
ompi_free_list_init (&elan_mp->elan_events_free,
|
||||
sizeof (mca_ptl_elan_send_frag_t),
|
||||
OBJ_CLASS (mca_ptl_elan_send_frag_t),
|
||||
ompi_free_list_init (&(elan_mp->elan_recv_frags_free),
|
||||
sizeof (mca_ptl_elan_recv_frag_t),
|
||||
OBJ_CLASS (mca_ptl_elan_recv_frag_t),
|
||||
elan_mp->elan_free_list_num,
|
||||
elan_mp->elan_free_list_max,
|
||||
elan_mp->elan_free_list_inc, NULL);
|
||||
|
||||
/* use default allocator */
|
||||
|
||||
/* open basic elan device */
|
||||
if (OMPI_SUCCESS != ompi_mca_ptl_elan_init(&mca_ptl_elan_module)) {
|
||||
ompi_output(0,
|
||||
@ -231,14 +232,11 @@ mca_ptl_elan_module_init (int *num_ptls,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* we need to publish some information for elan.
|
||||
*/
|
||||
if (OMPI_SUCCESS != mca_ptl_elan_module_register(&mca_ptl_elan_module)) {
|
||||
ompi_output(0,
|
||||
"[%s:%d] error in malloc for elan PTL references\n",
|
||||
__FILE__, __LINE__);
|
||||
return OMPI_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptls = (mca_ptl_t **) malloc (elan_mp->elan_num_ptls *
|
||||
@ -250,7 +248,6 @@ mca_ptl_elan_module_init (int *num_ptls,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Will coherency on two replicas be a potential problem? */
|
||||
memcpy (ptls, elan_mp->elan_ptls,
|
||||
elan_mp->elan_num_ptls * sizeof (mca_ptl_elan_t *));
|
||||
*num_ptls = elan_mp->elan_num_ptls;
|
||||
@ -259,28 +256,14 @@ mca_ptl_elan_module_init (int *num_ptls,
|
||||
return ptls;
|
||||
}
|
||||
|
||||
/*
|
||||
* FIXME: to support ELAN module control
|
||||
*/
|
||||
|
||||
/* support ELAN module control */
|
||||
int
|
||||
mca_ptl_elan_module_control (int param,
|
||||
void *value,
|
||||
size_t size)
|
||||
{
|
||||
switch (param) {
|
||||
#if 0
|
||||
case MCA_PTL_ENABLE:
|
||||
if (*(int *) value) {
|
||||
/* Trying to trigger the thread progress engine,
|
||||
* Here the elan PTL does not have this capability
|
||||
* for now. So we skip this function. */
|
||||
ompi_event_add (&elan_mp->elan_recv_event, 0);
|
||||
} else {
|
||||
ompi_event_del (&elan_mp->elan_recv_event);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -288,13 +271,12 @@ mca_ptl_elan_module_control (int param,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* FIXME: to support event-based module progress.
|
||||
*/
|
||||
/* TODO: to support event-based module progress later. */
|
||||
|
||||
int
|
||||
mca_ptl_elan_module_progress (mca_ptl_tstamp_t tstamp)
|
||||
{
|
||||
mca_ptl_elan_drain_recv(elan_mp);
|
||||
mca_ptl_elan_update_send(elan_mp);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -23,26 +23,29 @@ typedef enum {
|
||||
NUM_MCA_PTL_ELAN_STAT
|
||||
} mca_ptl_elan_status_t;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* An abstraction that represents a connection to a peer process.
|
||||
* Peers are always connected unless they are in different LAN or died.
|
||||
*/
|
||||
struct mca_ptl_elan_peer_t {
|
||||
ompi_list_item_t super;
|
||||
|
||||
struct mca_ptl_elan_t* peer_ptl;
|
||||
/*struct mca_ptl_elan_remote_t *peer_ptl; */
|
||||
struct mca_ptl_elan_proc_t* peer_proc;
|
||||
struct mca_ptl_elan_addr_t* peer_addr; /**< address of peer */
|
||||
|
||||
int resending; /* A resending stage, no more new dma's */
|
||||
int num_resend; /* How many times I have retried */
|
||||
double open_time;
|
||||
double close_time;
|
||||
int peer_state;
|
||||
int num_credits;
|
||||
int max_credits;
|
||||
int resending;
|
||||
int num_resend;
|
||||
double known_alive_time;
|
||||
};
|
||||
typedef struct mca_ptl_elan_peer_t mca_ptl_elan_peer_t;
|
||||
|
||||
extern ompi_class_t mca_ptl_elan_peer_t_class;
|
||||
OBJ_CLASS_DECLARATION(mca_ptl_elan_peer_t);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -0,0 +1,331 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include "types.h"
|
||||
#include "datatype/datatype.h"
|
||||
#include "mca/pml/base/pml_base_sendreq.h"
|
||||
#include "mca/pml/base/pml_base_recvreq.h"
|
||||
#include "ptl_elan.h"
|
||||
#include "ptl_elan_peer.h"
|
||||
#include "ptl_elan_proc.h"
|
||||
#include "ptl_elan_frag.h"
|
||||
#include "ptl_elan_req.h"
|
||||
#include "ptl_elan_priv.h"
|
||||
|
||||
static void
|
||||
mca_ptl_elan_init_qdma_desc (struct ompi_ptl_elan_qdma_desc_t *desc,
|
||||
mca_ptl_elan_t * ptl,
|
||||
mca_ptl_elan_send_request_t * req)
|
||||
{
|
||||
char *app_buff;
|
||||
int header_length;
|
||||
int mesg_length;
|
||||
int flags = 0; /* FIXME: now */
|
||||
mca_ptl_base_header_t *hdr;
|
||||
|
||||
int destvp = ptl->elan_vp;
|
||||
|
||||
/* TODO: For now, assume data are contiguous and less than eager size */
|
||||
app_buff = (char *) req->super.super.req_addr;
|
||||
|
||||
header_length = sizeof (mca_ptl_base_match_header_t);
|
||||
mesg_length = req->super.req_bytes_packed;
|
||||
|
||||
hdr = (mca_ptl_base_header_t *) & desc->buff[0];
|
||||
|
||||
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH;
|
||||
hdr->hdr_common.hdr_flags = flags;
|
||||
hdr->hdr_common.hdr_size = sizeof (mca_ptl_base_match_header_t);
|
||||
hdr->hdr_frag.hdr_frag_offset = 0;
|
||||
hdr->hdr_frag.hdr_frag_seq = 0;
|
||||
hdr->hdr_frag.hdr_src_ptr.pval = 0;
|
||||
hdr->hdr_frag.hdr_dst_ptr.lval = 0;
|
||||
|
||||
hdr->hdr_match.hdr_contextid = req->super.super.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = req->super.super.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_dst = req->super.super.req_peer;
|
||||
hdr->hdr_match.hdr_tag = req->super.super.req_tag;
|
||||
hdr->hdr_match.hdr_msg_length = mesg_length;
|
||||
hdr->hdr_match.hdr_msg_seq = req->super.super.req_sequence;
|
||||
hdr->hdr_frag.hdr_frag_length = mesg_length;
|
||||
|
||||
/* Fill up all the necessary fields */
|
||||
memcpy (&desc->buff[header_length], app_buff, mesg_length);
|
||||
desc->main_dma.dma_srcAddr = MAIN2ELAN (desc->rail->r_ctx,
|
||||
&desc->buff[0]);
|
||||
|
||||
/* XXXX Hardwired DMA retry count */
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE ((header_length +
|
||||
mesg_length),
|
||||
DMA_DataTypeByte,
|
||||
DMA_QueueWrite, 16);
|
||||
desc->main_dma.dma_cookie =
|
||||
elan4_local_cookie (ptl->queue->tx_cpool, E4_COOKIE_TYPE_LOCAL_DMA,
|
||||
destvp);
|
||||
desc->main_dma.dma_vproc = destvp;
|
||||
|
||||
ompi_output (0,
|
||||
"elan_queueTx(%p): DMA: typeSize %Lx vproc %lx srcAddr %Lx "
|
||||
"dstAddr %Lx srcEvent %Lx dstEvent %Lx\n",
|
||||
desc->rail,
|
||||
desc->main_dma.dma_typeSize,
|
||||
desc->main_dma.dma_vproc,
|
||||
desc->main_dma.dma_srcAddr,
|
||||
desc->main_dma.dma_dstAddr,
|
||||
desc->main_dma.dma_srcEvent, desc->main_dma.dma_dstEvent);
|
||||
|
||||
/* Make main memory coherent with IO domain (IA64) */
|
||||
MEMBAR_VISIBLE ();
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_ptl_elan_start_desc (int type,
|
||||
mca_ptl_elan_desc_item_t * desc)
|
||||
{
|
||||
mca_ptl_elan_t *ptl;
|
||||
mca_ptl_elan_send_request_t *req;
|
||||
|
||||
if (type == MCA_PTL_ELAN_QDMA_DESC) {
|
||||
struct ompi_ptl_elan_qdma_desc_t *qdma;
|
||||
|
||||
qdma = desc->item.qdma;
|
||||
ptl = qdma->ptl;
|
||||
req = qdma->req;
|
||||
|
||||
mca_ptl_elan_init_qdma_desc (qdma, ptl, req);
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma);
|
||||
ptl->queue->tx_cmdq->cmdq_flush (ptl->queue->tx_cmdq);
|
||||
ompi_output (0, "elan_queueTx(%p) returning %p\n",
|
||||
ptl->queue, desc);
|
||||
|
||||
/* Insert desc into the list of outstanding DMA's */
|
||||
ompi_list_append (&ptl->queue->tx_desc, (ompi_list_item_t *) desc);
|
||||
|
||||
} else {
|
||||
ompi_output (0,
|
||||
"Other types of DMA are not supported right now \n");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
mca_ptl_elan_data_frag (struct mca_ptl_elan_t *ptl,
|
||||
mca_ptl_base_header_t * header)
|
||||
{
|
||||
/* Allocate a recv frag descriptor */
|
||||
mca_ptl_elan_recv_frag_t *recv_frag;
|
||||
ompi_list_item_t *item;
|
||||
mca_pml_base_recv_request_t *request;
|
||||
|
||||
int rc;
|
||||
|
||||
OMPI_FREE_LIST_GET (&mca_ptl_elan_module.elan_recv_frags_free,
|
||||
item, rc);
|
||||
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
ompi_output (0,
|
||||
"[%s:%d] Unable to allocate a recv fragment",
|
||||
__FILE__, __LINE__);
|
||||
return;
|
||||
/* TODO: progress the recv state machine */
|
||||
}
|
||||
|
||||
recv_frag = (mca_ptl_elan_recv_frag_t *) item;
|
||||
|
||||
recv_frag->super.super.frag_owner = (mca_ptl_t *) ptl;
|
||||
recv_frag->super.super.frag_addr = NULL;
|
||||
recv_frag->super.super.frag_size = 0;
|
||||
recv_frag->super.super.frag_peer = NULL; /* FIXME: peer; */
|
||||
recv_frag->super.frag_request = 0;
|
||||
recv_frag->super.frag_is_buffered = false;
|
||||
recv_frag->frag_hdr_cnt = 0;
|
||||
recv_frag->frag_msg_cnt = 0;
|
||||
|
||||
/* Take the header */
|
||||
recv_frag->super.super.frag_header = *header;
|
||||
|
||||
/* match with preposted requests */
|
||||
if (mca_ptl_base_recv_frag_match (recv_frag->super.super.frag_owner,
|
||||
&recv_frag->super,
|
||||
&recv_frag->super.super.frag_header.
|
||||
hdr_match)) {
|
||||
/* copy into the request buffer */
|
||||
request = recv_frag->super.frag_request;
|
||||
memcpy (request->super.req_addr,
|
||||
(char *) header + sizeof (mca_ptl_base_header_t),
|
||||
header->hdr_frag.hdr_frag_length);
|
||||
} else {
|
||||
recv_frag->super.frag_is_buffered = true;
|
||||
recv_frag->super.super.frag_addr = recv_frag->unex_buff;
|
||||
recv_frag->super.super.frag_size =
|
||||
header->hdr_frag.hdr_frag_length;
|
||||
memcpy (recv_frag->unex_buff,
|
||||
(char *) header + sizeof (mca_ptl_base_header_t),
|
||||
header->hdr_frag.hdr_frag_length);
|
||||
}
|
||||
|
||||
/* Complete the fragment */
|
||||
if (NULL != recv_frag->super.frag_request) {
|
||||
mca_ptl_base_recv_progress_fn_t progress;
|
||||
|
||||
progress = recv_frag->super.super.frag_owner->ptl_recv_progress;
|
||||
request = recv_frag->super.frag_request;
|
||||
|
||||
/* progress the request */
|
||||
progress (recv_frag->super.super.frag_owner, request,
|
||||
&recv_frag->super);
|
||||
mca_ptl_elan_recv_frag_return (recv_frag->super.super.frag_owner,
|
||||
recv_frag);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
mca_ptl_elan_ctrl_frag (struct mca_ptl_elan_t *ptl,
|
||||
mca_ptl_base_header_t * header)
|
||||
{
|
||||
/* TODO:
|
||||
* 0) First of all, no need to allocate frag descriptors,
|
||||
* since control packet does not contain data.
|
||||
* 1) Start successive fragments if it is an ACK
|
||||
* 2) Resend the original fragment if it is a NACK
|
||||
* 3) Update the request if no more fragment.
|
||||
*/
|
||||
}
|
||||
|
||||
int
|
||||
mca_ptl_elan_drain_recv (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
{
|
||||
struct ompi_ptl_elan_queue_ctrl_t *queue;
|
||||
ompi_ptl_elan_recv_queue_t *rxq;
|
||||
struct mca_ptl_elan_t *ptl;
|
||||
ELAN_CTX *ctx;
|
||||
|
||||
int num_ptls;
|
||||
int i;
|
||||
int rc;
|
||||
|
||||
num_ptls = emp->elan_num_ptls;
|
||||
|
||||
/* Iterate over all the PTL input Queues */
|
||||
for (i = 0; i < num_ptls; i++) {
|
||||
ptl = emp->elan_ptls[i];
|
||||
queue = emp->elan_ptls[i]->queue;
|
||||
rxq = queue->rxq;
|
||||
ctx = ptl->ptl_elan_ctx;
|
||||
|
||||
OMPI_LOCK (&queue->rx_lock);
|
||||
|
||||
rc = elan4_pollevent_word (ctx, &rxq->qr_doneWord, 1);
|
||||
|
||||
if (rc) {
|
||||
|
||||
mca_ptl_base_header_t *header;
|
||||
|
||||
header = (mca_ptl_base_header_t *) rxq->qr_fptr;
|
||||
|
||||
switch (header->hdr_common.hdr_type) {
|
||||
case MCA_PTL_HDR_TYPE_MATCH:
|
||||
case MCA_PTL_HDR_TYPE_FRAG:
|
||||
/* a data fragment */
|
||||
mca_ptl_elan_data_frag (ptl, header);
|
||||
break;
|
||||
case MCA_PTL_HDR_TYPE_ACK:
|
||||
case MCA_PTL_HDR_TYPE_NACK:
|
||||
/* a control fragment for a message */
|
||||
mca_ptl_elan_ctrl_frag (ptl, header);
|
||||
break;
|
||||
default:
|
||||
ompi_output (0, "[%s:%d] unknow fragment type %d\n"
|
||||
__FILE__, __LINE__,
|
||||
header->hdr_common.hdr_type);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Work out the new front pointer */
|
||||
if (rxq->qr_fptr == rxq->qr_top) {
|
||||
rxq->qr_fptr = rxq->qr_base;
|
||||
rxq->qr_efptr = rxq->qr_efitem;
|
||||
} else {
|
||||
rxq->qr_fptr = (void *) ((uintptr_t) rxq->qr_fptr
|
||||
+ queue->rx_slotsize);
|
||||
rxq->qr_efptr += queue->rx_slotsize;
|
||||
}
|
||||
|
||||
/* PCI Write */
|
||||
queue->input->q_fptr = rxq->qr_efptr;
|
||||
MEMBAR_STORESTORE ();
|
||||
|
||||
/* Reset the event */
|
||||
RESETEVENT_WORD (&rxq->qr_doneWord);
|
||||
|
||||
/* Order RESETEVENT wrt to wait_event_cmd */
|
||||
MEMBAR_STORESTORE ();
|
||||
|
||||
/* Re-prime queue event by issuing a waitevent(1) on it */
|
||||
elan4_wait_event_cmd (rxq->qr_cmdq,
|
||||
/* Is qr_elanDone really a main memory address? */
|
||||
MAIN2ELAN (ctx, &rxq->qr_elanDone),
|
||||
E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE,
|
||||
E4_EVENT_DTYPE_LONG, 0),
|
||||
MAIN2ELAN (ctx, (void *) &rxq-> qr_doneWord),
|
||||
0xfeedfacedeadbeef);
|
||||
rxq->qr_cmdq->cmdq_flush (rxq->qr_cmdq);
|
||||
|
||||
}
|
||||
OMPI_UNLOCK (&queue->rx_lock);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
mca_ptl_elan_update_send (mca_ptl_elan_module_1_0_0_t * emp)
|
||||
{
|
||||
struct mca_ptl_elan_t *ptl;
|
||||
ompi_ptl_elan_queue_ctrl_t *queue;
|
||||
mca_ptl_elan_desc_item_t *desc;
|
||||
ELAN4_CTX *ctx;
|
||||
|
||||
int num_ptls;
|
||||
int i;
|
||||
|
||||
num_ptls = emp->elan_num_ptls;
|
||||
|
||||
/* Update the send request if any of send's is completed */
|
||||
for (i = 0; i < num_ptls; i++) {
|
||||
ptl = emp->elan_ptls[i];
|
||||
queue = ptl->queue;
|
||||
ctx = ptl->ptl_elan_ctx;
|
||||
|
||||
do {
|
||||
desc = (mca_ptl_elan_desc_item_t *)
|
||||
ompi_list_get_first (&queue->tx_desc);
|
||||
#if 1
|
||||
if ((int *) (&desc->item.qdma->main_doneWord))
|
||||
#else
|
||||
/* Poll the completion event for 1usec */
|
||||
if (elan4_pollevent_word
|
||||
(ctx, desc->item.qdma->main_doneWord, 1))
|
||||
#endif
|
||||
{
|
||||
mca_ptl_elan_send_request_t *req;
|
||||
/* Remove the desc, update the request, put back to free list */
|
||||
desc = (mca_ptl_elan_desc_item_t *)
|
||||
ompi_list_remove_first (&queue->tx_desc);
|
||||
req = desc->item.qdma->req;
|
||||
req->super.super.req_mpi_done = true;
|
||||
req->super.super.req_pml_done = true;
|
||||
OMPI_FREE_LIST_RETURN (&queue->tx_desc_free,
|
||||
(ompi_list_item_t *) desc);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while (ompi_list_get_size (&queue->tx_desc) > 0);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -59,8 +59,7 @@ struct mca_ptl_elan_addr_t {
|
||||
};
|
||||
typedef struct mca_ptl_elan_addr_t mca_ptl_elan_addr_t;
|
||||
|
||||
struct ompi_ptl_elan_recv_queue_t
|
||||
{
|
||||
struct ompi_ptl_elan_recv_queue_t {
|
||||
/* Events needs to be aligned */
|
||||
EVENT_WORD qr_doneWord;
|
||||
ADDR_SDRAM qr_qEvent;
|
||||
@ -80,28 +79,32 @@ struct ompi_ptl_elan_recv_queue_t
|
||||
};
|
||||
typedef struct ompi_ptl_elan_recv_queue_t ompi_ptl_elan_recv_queue_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
typedef struct {
|
||||
/* SHOULD BE 128-byte aligned */
|
||||
uint8_t data[INPUT_QUEUE_MAX]; /* queue req data packet */
|
||||
/* SHOULD be 32-byte aligned */
|
||||
E4_Event32 event32; /* Local elan completion event */
|
||||
} ompi_elan_event_t;
|
||||
|
||||
struct ompi_ptl_elan_queue_send_t
|
||||
{
|
||||
struct ompi_ptl_elan_qdma_desc_t {
|
||||
E4_DMA64 main_dma; /**< Must be 8-byte aligned */
|
||||
/* 8 byte aligned */
|
||||
|
||||
volatile E4_uint64 main_doneWord; /**< main memory location to poll */
|
||||
ompi_elan_event_t *elan_data_event; /**< 128-byte aligned copy event */
|
||||
RAIL *rail;
|
||||
/* 8 byte aligned */
|
||||
uint8_t buff[INPUT_QUEUE_MAX]; /**< queue data */
|
||||
};
|
||||
typedef struct ompi_ptl_elan_queue_send_t ompi_ptl_elan_queue_send_t;
|
||||
|
||||
struct ompi_ptl_elan_queue_ctrl_t
|
||||
{
|
||||
mca_ptl_elan_t *ptl;
|
||||
mca_ptl_elan_send_request_t *req;
|
||||
/* 8 byte aligned */
|
||||
|
||||
uint8_t buff[INPUT_QUEUE_MAX]; /**< queue data */
|
||||
/* 8 byte aligned */
|
||||
};
|
||||
typedef struct ompi_ptl_elan_qdma_desc_t ompi_ptl_elan_qdma_desc_t;
|
||||
|
||||
struct ompi_ptl_elan_queue_ctrl_t {
|
||||
/* Transmit Queues */
|
||||
/** < elan located INPUT_QUEUE_ALIGN'ed with INPUT_QUEUE_SIZE */
|
||||
E4_InputQueue *input;
|
||||
@ -122,7 +125,7 @@ struct ompi_ptl_elan_queue_ctrl_t
|
||||
int rx_nslots;
|
||||
|
||||
/*Automatic progression */
|
||||
void (*rx_fn)(void);
|
||||
void (*rx_fn) (void);
|
||||
void *rx_handle;
|
||||
|
||||
/* Recv Queue has to be well-aligned */
|
||||
@ -181,26 +184,29 @@ struct mca_ptl_elan_state_t {
|
||||
typedef struct mca_ptl_elan_state_t mca_ptl_elan_state_t;
|
||||
|
||||
/* Util functions, consider moving into a file ptl_elan_util.h */
|
||||
ELAN_SLEEP *
|
||||
ompi_init_elan_sleepdesc(mca_ptl_elan_state_t * ems, RAIL *rail);
|
||||
ELAN_SLEEP *ompi_init_elan_sleepdesc (mca_ptl_elan_state_t * ems,
|
||||
RAIL * rail);
|
||||
|
||||
/* Initialization and finalization routines */
|
||||
int ompi_mca_ptl_elan_init( mca_ptl_elan_module_1_0_0_t * emp);
|
||||
int ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp);
|
||||
int ompi_mca_ptl_elan_finalize (mca_ptl_elan_module_1_0_0_t * emp);
|
||||
|
||||
/* communication initialization prototypes */
|
||||
int ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails);
|
||||
int ompi_init_elan_sten (mca_ptl_elan_module_1_0_0_t* emp, int num_rails);
|
||||
int ompi_init_elan_rdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails);
|
||||
int ompi_init_elan_stat (mca_ptl_elan_module_1_0_0_t* emp, int num_rails);
|
||||
int ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t * emp,
|
||||
int num_rails);
|
||||
int ompi_init_elan_rdma (mca_ptl_elan_module_1_0_0_t * emp,
|
||||
int num_rails);
|
||||
int ompi_init_elan_stat (mca_ptl_elan_module_1_0_0_t * emp,
|
||||
int num_rails);
|
||||
|
||||
/* communication prototypes */
|
||||
int mca_ptl_elan_start_desc(int type, mca_ptl_elan_desc_item_t *desc);
|
||||
int mca_ptl_elan_poll_desc(int type, mca_ptl_elan_desc_item_t *desc);
|
||||
int mca_ptl_elan_wait_desc(int type, mca_ptl_elan_desc_item_t *desc);
|
||||
|
||||
/* control, synchronization and state prototypes */
|
||||
|
||||
/* Just to get rid of a warning from elan4 libraies,
|
||||
* Many more needed but who cares. */
|
||||
int elan4_block_inputter (ELAN4_CTX *ctx, unsigned blocked);
|
||||
int mca_ptl_elan_drain_recv(mca_ptl_elan_module_1_0_0_t *emp);
|
||||
int mca_ptl_elan_update_send(mca_ptl_elan_module_1_0_0_t *emp);
|
||||
|
||||
/**
|
||||
* utility routines for parameter registration
|
||||
|
@ -27,7 +27,8 @@ ompi_class_t mca_ptl_elan_proc_t_class = {
|
||||
* Initialize elan proc instance
|
||||
*/
|
||||
|
||||
void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc)
|
||||
void
|
||||
mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc)
|
||||
{
|
||||
proc->proc_ompi = NULL;
|
||||
proc->proc_addrs = NULL;
|
||||
@ -38,12 +39,12 @@ void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc)
|
||||
proc->proc_guid.jobid = 0;
|
||||
proc->proc_guid.procid = 0;
|
||||
|
||||
OBJ_CONSTRUCT(&proc->proc_lock, ompi_mutex_t);
|
||||
OBJ_CONSTRUCT (&proc->proc_lock, ompi_mutex_t);
|
||||
|
||||
/* add to list of all proc instance */
|
||||
OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock);
|
||||
ompi_list_append(&mca_ptl_elan_module.elan_procs, &proc->super);
|
||||
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
|
||||
OMPI_THREAD_LOCK (&mca_ptl_elan_module.elan_lock);
|
||||
ompi_list_append (&mca_ptl_elan_module.elan_procs, &proc->super);
|
||||
OMPI_THREAD_UNLOCK (&mca_ptl_elan_module.elan_lock);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -53,16 +54,17 @@ void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc)
|
||||
* Cleanup elan proc instance
|
||||
*/
|
||||
|
||||
void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc)
|
||||
void
|
||||
mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc)
|
||||
{
|
||||
/* remove from list of all proc instances */
|
||||
OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock);
|
||||
ompi_list_remove_item(&mca_ptl_elan_module.elan_procs, &proc->super);
|
||||
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
|
||||
OMPI_THREAD_LOCK (&mca_ptl_elan_module.elan_lock);
|
||||
ompi_list_remove_item (&mca_ptl_elan_module.elan_procs, &proc->super);
|
||||
OMPI_THREAD_UNLOCK (&mca_ptl_elan_module.elan_lock);
|
||||
|
||||
/* release resources */
|
||||
if(NULL != proc->proc_peers)
|
||||
free(proc->proc_peers);
|
||||
if (NULL != proc->proc_peers)
|
||||
free (proc->proc_peers);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -75,47 +77,55 @@ void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc)
|
||||
* addresses) associated w/ a given destination on this datastructure.
|
||||
*/
|
||||
|
||||
mca_ptl_elan_proc_t *mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc)
|
||||
mca_ptl_elan_proc_t *
|
||||
mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc)
|
||||
{
|
||||
int rc;
|
||||
size_t size;
|
||||
mca_ptl_elan_proc_t* ptl_proc;
|
||||
mca_ptl_elan_proc_t *ptl_proc;
|
||||
|
||||
ptl_proc = mca_ptl_elan_proc_lookup_ompi(ompi_proc);
|
||||
if(ptl_proc != NULL)
|
||||
ptl_proc = mca_ptl_elan_proc_lookup_ompi (ompi_proc);
|
||||
if (ptl_proc != NULL)
|
||||
return ptl_proc;
|
||||
|
||||
ptl_proc = OBJ_NEW(mca_ptl_elan_proc_t);
|
||||
ptl_proc = OBJ_NEW (mca_ptl_elan_proc_t);
|
||||
ptl_proc->proc_ompi = ompi_proc;
|
||||
ptl_proc->proc_guid = ompi_proc->proc_name;
|
||||
|
||||
rc = mca_base_modex_recv( &mca_ptl_elan_module.super.ptlm_version,
|
||||
ompi_proc, (void**)&ptl_proc->proc_addrs, &size);
|
||||
/* Extract exposed addresses from remote proc */
|
||||
rc = mca_base_modex_recv (&mca_ptl_elan_module.super.ptlm_version,
|
||||
ompi_proc, (void **) &ptl_proc->proc_addrs,
|
||||
&size);
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
ompi_output(0, "[%s:%d] mca_base_modex_recv failed to recv data \n",
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
ompi_output (0,
|
||||
"[%s:%d] mca_base_modex_recv failed to recv data \n",
|
||||
__FILE__, __LINE__);
|
||||
OBJ_RELEASE(ptl_proc);
|
||||
OBJ_RELEASE (ptl_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(0 != (size % sizeof(mca_ptl_elan_addr_t))) {
|
||||
ompi_output(0, "[%s:%d] invalid received data size %d\n", size);
|
||||
if (0 != (size % sizeof (mca_ptl_elan_addr_t))) {
|
||||
ompi_output (0, "[%s:%d] invalid received data size %d\n",
|
||||
__FILE__, __LINE__, size);
|
||||
return NULL;
|
||||
}
|
||||
ptl_proc->proc_addr_count = size / sizeof(mca_ptl_elan_addr_t);
|
||||
ptl_proc->proc_addr_count = size / sizeof (mca_ptl_elan_addr_t);
|
||||
|
||||
/* allocate space for peer array - one for each exported address */
|
||||
ptl_proc->proc_peers = (mca_ptl_elan_peer_t**)
|
||||
malloc(ptl_proc->proc_addr_count * sizeof(mca_ptl_elan_peer_t*));
|
||||
ptl_proc->proc_peers = (mca_ptl_elan_peer_t **)
|
||||
malloc (ptl_proc->proc_addr_count *
|
||||
sizeof (mca_ptl_elan_peer_t *));
|
||||
|
||||
if(NULL == ptl_proc->proc_peers) {
|
||||
OBJ_RELEASE(ptl_proc);
|
||||
if (NULL == ptl_proc->proc_peers) {
|
||||
OBJ_RELEASE (ptl_proc);
|
||||
ompi_output (0, "[%s:%d] unable to allocate peer procs \n"
|
||||
__FILE__, __LINE__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(NULL == mca_ptl_elan_module.elan_local
|
||||
&& ompi_proc == ompi_proc_local()) {
|
||||
if (NULL == mca_ptl_elan_module.elan_local
|
||||
&& ompi_proc == ompi_proc_local ()) {
|
||||
mca_ptl_elan_module.elan_local = ptl_proc;
|
||||
}
|
||||
return ptl_proc;
|
||||
@ -126,24 +136,25 @@ mca_ptl_elan_proc_t *mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc)
|
||||
* ompi_proc_t instance.
|
||||
*/
|
||||
static mca_ptl_elan_proc_t *
|
||||
mca_ptl_elan_proc_lookup_ompi (ompi_proc_t *ompi_proc)
|
||||
mca_ptl_elan_proc_lookup_ompi (ompi_proc_t * ompi_proc)
|
||||
{
|
||||
mca_ptl_elan_proc_t* elan_proc;
|
||||
mca_ptl_elan_proc_t *elan_proc;
|
||||
|
||||
OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock);
|
||||
OMPI_THREAD_LOCK (&mca_ptl_elan_module.elan_lock);
|
||||
|
||||
elan_proc = (mca_ptl_elan_proc_t*)
|
||||
ompi_list_get_first(&mca_ptl_elan_module.elan_procs);
|
||||
elan_proc = (mca_ptl_elan_proc_t *)
|
||||
ompi_list_get_first (&mca_ptl_elan_module.elan_procs);
|
||||
|
||||
for( ; elan_proc != (mca_ptl_elan_proc_t*)
|
||||
ompi_list_get_end(&mca_ptl_elan_module.elan_procs);
|
||||
elan_proc = (mca_ptl_elan_proc_t*)ompi_list_get_next(elan_proc)) {
|
||||
if(elan_proc->proc_ompi == ompi_proc) {
|
||||
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
|
||||
for (; elan_proc != (mca_ptl_elan_proc_t *)
|
||||
ompi_list_get_end (&mca_ptl_elan_module.elan_procs);
|
||||
elan_proc =
|
||||
(mca_ptl_elan_proc_t *) ompi_list_get_next (elan_proc)) {
|
||||
if (elan_proc->proc_ompi == ompi_proc) {
|
||||
OMPI_THREAD_UNLOCK (&mca_ptl_elan_module.elan_lock);
|
||||
return elan_proc;
|
||||
}
|
||||
}
|
||||
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
|
||||
OMPI_THREAD_UNLOCK (&mca_ptl_elan_module.elan_lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -153,7 +164,9 @@ mca_ptl_elan_proc_lookup_ompi (ompi_proc_t *ompi_proc)
|
||||
* Look for an existing ELAN process instance based on the globally unique
|
||||
* process identifier.
|
||||
*/
|
||||
mca_ptl_elan_proc_t *mca_ptl_elan_proc_lookup (void *guid, size_t size)
|
||||
mca_ptl_elan_proc_t *
|
||||
mca_ptl_elan_proc_lookup (void *guid,
|
||||
size_t size)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
@ -163,7 +176,8 @@ mca_ptl_elan_proc_t *mca_ptl_elan_proc_lookup (void *guid, size_t size)
|
||||
* Note that this routine must be called with the lock on the process already
|
||||
* held. Insert a ptl instance into the proc array and assign it an address.
|
||||
*/
|
||||
int mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc,
|
||||
int
|
||||
mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc,
|
||||
mca_ptl_elan_peer_t * ptl_peer)
|
||||
{
|
||||
int i;
|
||||
@ -177,24 +191,24 @@ int mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc,
|
||||
* directly attached network. If we don't find one, pick the first
|
||||
* unused address. */
|
||||
|
||||
for(i=0; i<ptl_proc->proc_addr_count; i++) {
|
||||
for (i = 0; i < ptl_proc->proc_addr_count; i++) {
|
||||
|
||||
unsigned vp_local;
|
||||
unsigned vp_remote;
|
||||
mca_ptl_elan_addr_t* peer_addr;
|
||||
mca_ptl_elan_addr_t *peer_addr;
|
||||
|
||||
peer_addr = ptl_proc->proc_addrs + i;
|
||||
if(peer_addr->addr_inuse != 0) {
|
||||
if (peer_addr->addr_inuse != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
vp_local = ptl_elan->elan_vp;
|
||||
vp_remote = peer_addr->elan_vp;
|
||||
|
||||
if(vp_local = vp_remote) {
|
||||
if (vp_local = vp_remote) {
|
||||
ptl_peer->peer_addr = peer_addr;
|
||||
break;
|
||||
} else if(ptl_peer->peer_addr != 0) {
|
||||
} else if (ptl_peer->peer_addr != 0) {
|
||||
ptl_peer->peer_addr = peer_addr;
|
||||
}
|
||||
}
|
||||
@ -209,7 +223,8 @@ int mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc,
|
||||
* no longer in use.
|
||||
*/
|
||||
|
||||
int mca_ptl_elan_proc_remove (mca_ptl_elan_proc_t * ptl_proc,
|
||||
int
|
||||
mca_ptl_elan_proc_remove (mca_ptl_elan_proc_t * ptl_proc,
|
||||
mca_ptl_elan_peer_t * ptl_peer)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
@ -220,8 +235,10 @@ int mca_ptl_elan_proc_remove (mca_ptl_elan_proc_t * ptl_proc,
|
||||
* loop through all available PTLs for one matching the source address
|
||||
* of the request.
|
||||
*/
|
||||
bool mca_ptl_elan_proc_accept (mca_ptl_elan_proc_t * ptl_proc,
|
||||
struct sockaddr_in * addr, int sd)
|
||||
bool
|
||||
mca_ptl_elan_proc_accept (mca_ptl_elan_proc_t * ptl_proc,
|
||||
struct sockaddr_in * addr,
|
||||
int sd)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -9,10 +9,22 @@
|
||||
#include "ptl_elan_req.h"
|
||||
#include "ptl_elan.h"
|
||||
|
||||
static void
|
||||
mca_ptl_elan_send_request_construct (mca_ptl_elan_send_request_t *);
|
||||
static void
|
||||
mca_ptl_elan_send_request_destruct (mca_ptl_elan_send_request_t *);
|
||||
void
|
||||
mca_ptl_elan_send_request_construct (mca_ptl_elan_send_request_t * request)
|
||||
{
|
||||
OBJ_CONSTRUCT (&request->super, mca_pml_base_send_request_t);
|
||||
request->desc_type = 0;
|
||||
request->req_frag = NULL;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
mca_ptl_elan_send_request_destruct (mca_ptl_elan_send_request_t * request)
|
||||
{
|
||||
OBJ_DESTRUCT (&request->super);
|
||||
request->desc_type = 0;
|
||||
request->req_frag = NULL;
|
||||
}
|
||||
|
||||
ompi_class_t mca_ptl_elan_send_request_t_class = {
|
||||
"mca_ptl_elan_send_request_t",
|
||||
@ -20,17 +32,3 @@ ompi_class_t mca_ptl_elan_send_request_t_class = {
|
||||
(ompi_construct_t) mca_ptl_elan_send_request_construct,
|
||||
(ompi_destruct_t) mca_ptl_elan_send_request_destruct
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
mca_ptl_elan_send_request_construct (mca_ptl_elan_send_request_t * request)
|
||||
{
|
||||
OBJ_CONSTRUCT (&request->req_frag, mca_ptl_elan_send_frag_t);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
mca_ptl_elan_send_request_destruct (mca_ptl_elan_send_request_t * request)
|
||||
{
|
||||
OBJ_DESTRUCT (&request->req_frag);
|
||||
}
|
||||
|
@ -25,20 +25,23 @@
|
||||
#include "ptl_elan.h"
|
||||
#include "ptl_elan_frag.h"
|
||||
|
||||
enum {
|
||||
MCA_PTL_ELAN_NULL_DESC,
|
||||
MCA_PTL_ELAN_QDMA_DESC,
|
||||
MCA_PTL_ELAN_PUTGET_DESC
|
||||
};
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_ptl_elan_send_request_t);
|
||||
OBJ_CLASS_DECLARATION(mca_ptl_elan_recv_request_t);
|
||||
/*extern ompi_class_t mca_ptl_elan_send_request_t_class;*/
|
||||
/*extern ompi_class_t mca_ptl_elan_recv_request_t_class;*/
|
||||
|
||||
/**
|
||||
* ELAN send request derived type. The send request contains both the
|
||||
* base send request, and space for the first ELAN send fragment descriptor.
|
||||
* This avoids the overhead of a second allocation for the initial send
|
||||
* fragment on every send request.
|
||||
* ELAN send request derived type. The send request contains
|
||||
* the base send request and a point to the elan fragment descriptor
|
||||
*/
|
||||
struct mca_ptl_elan_send_request_t {
|
||||
mca_pml_base_send_request_t super;
|
||||
mca_ptl_elan_send_frag_t req_frag; /* first fragment */
|
||||
int desc_type;
|
||||
mca_ptl_elan_desc_item_t *req_frag;
|
||||
};
|
||||
typedef struct mca_ptl_elan_send_request_t mca_ptl_elan_send_request_t;
|
||||
#endif
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user