1
1

a) Initialize Queue DMA control structures

b) Initialize the Queue DMA based descriptors
c) Reorganize the initialization code.

Todo:

a) Frag and request descriptors and get put/get functions going
b) Add rdma support for elan4, better to have global memory

This commit was SVN r1535.
Этот коммит содержится в:
Weikuan Yu 2004-07-01 20:27:20 +00:00
родитель b797c86672
Коммит d6ebe382e4
7 изменённых файлов: 698 добавлений и 400 удалений

Просмотреть файл

@ -26,6 +26,7 @@ libmca_ptl_elan_la_SOURCES = \
ptl_elan_module.c \
ptl_elan_priv.c \
ptl_elan_init.c \
ptl_elan_comm_init.c \
ptl_elan.c

Просмотреть файл

@ -15,13 +15,11 @@
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#define MCA_PTL_ELAN_STATISTICS 0
#include "elan.h"
#include "init.h"
struct mca_ptl_elan_state_t;
struct ompi_ptl_elan_queue_ctrl_t;
extern struct mca_ptl_elan_state_t mca_ptl_elan_global_state;
/**
@ -39,6 +37,12 @@ struct mca_ptl_elan_t {
int ptl_ni_local; /**< PTL NI local rank */
int ptl_ni_total; /**< PTL NI total */
/* common elan structures, each ptl keeps a copy */
unsigned int elan_vp; /**< elan vpid, not ompi vpid */
unsigned int elan_nvp; /**< total # of elan vpid */
struct ompi_ptl_elan_queue_ctrl_t *queue; /**< Queue control structures */
int elan_sten_size; /**< sten packet len */
int elan_rdma_size; /**< qdma packet length */
int elan_qdma_size; /**< qdma packet length */
@ -63,11 +67,6 @@ struct mca_ptl_elan_t {
ompi_free_list_t elan_rdmas_free; /**< free elan rdma descriptors */
ompi_free_list_t elan_frags_free; /**< free elan rdma fragments */
#if MCA_PTL_ELAN_STATISTICS /* some statistics */
size_t ptl_bytes_sent;
size_t ptl_bytes_recv;
#endif
};
typedef struct mca_ptl_elan_t mca_ptl_elan_t;
extern mca_ptl_elan_t mca_ptl_elan;
@ -86,8 +85,9 @@ struct mca_ptl_elan_module_1_0_0_t {
/*
* We create our own simplified structure for managing elan state
* although libelan already provides one. We do not need
* all that tport, group structures.
* all those tport, group, atomic, shmem and NIC threads support.
*/
struct mca_ptl_elan_state_t *elan_ctrl;
struct mca_ptl_elan_t **elan_ptls; /**< array of available PTLs */
size_t elan_num_ptls; /**< number of ptls activated */

225
src/mca/ptl/elan/src/ptl_elan_comm_init.c Обычный файл
Просмотреть файл

@ -0,0 +1,225 @@
#include <signal.h>
#include <unistd.h>
#include <stdio.h>
#define _ELAN4
#define __elan4__
#include "ptl_elan.h"
#include "ptl_elan_priv.h"
#define ELAN_QUEUE_MAX INPUT_QUEUE_MAX
#define ELAN_QUEUE_LOST_SLOTS 1
#define SLOT_ALIGN 128
#define MAX(a,b) ((a>b)? a:b)
#define ALIGNUP(x,a) (((unsigned int)(x) + ((a)-1)) & (-(a)))
static int
ompi_init_elan_queue_events (mca_ptl_elan_t *ptl,
ompi_ptl_elan_queue_ctrl_t *queue)
{
int i;
int count;
int main_align, main_size;
int elan_align, elan_size;
RAIL *rail;
ELAN4_CTX *ctx;
ompi_free_list_t *flist;
ompi_ptl_elan_queue_send_t *ptr;
ompi_elan_event_t *elan_ptr;
rail = (RAIL *) ptl->ptl_elan_rail;
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
flist = &queue->tx_desc_free;
/* initialize list */
OBJ_CONSTRUCT (&queue->tx_desc, ompi_list_t);
OBJ_CONSTRUCT (&queue->tx_desc_free, ompi_free_list_t);
main_align = MAX(sizeof(void *), 8);
elan_align = MAX(sizeof(int *), 128);
main_size = ALIGNUP(sizeof (ompi_ptl_elan_queue_send_t), main_align);
elan_size = ALIGNUP(sizeof (ompi_ptl_elan_queue_send_t), elan_align);
flist->fl_elem_size =
flist->fl_max_to_alloc = 128;
flist->fl_num_allocated = 0;
flist->fl_num_per_alloc = count = 16;
flist->fl_elem_class = NULL; /* leave it null */
flist->fl_mpool = NULL; /* leave it null */
/* Allocate the elements */
ptr = (ompi_ptl_elan_queue_send_t*) elan4_allocMain(rail->r_alloc,
main_align, main_size*(count + 1));
OMPI_PTL_ELAN_CHECK_UNEX(ptr, NULL, OMPI_ERROR, 0);
/* Allocating elan related structures */
elan_ptr = (ompi_elan_event_t *) elan4_allocElan(rail->r_alloc,
elan_align, elan_size * (count + 1));
OMPI_PTL_ELAN_CHECK_UNEX(elan_ptr, NULL, OMPI_ERROR, 0);
for(i=0; i< flist->fl_num_per_alloc; i++) {
ompi_list_item_t* item;
ptr->rail = rail;
ptr->elan_data_event = elan_ptr;
item = (ompi_list_item_t*)ptr;
ompi_list_append(&flist->super, item);
/* Initialize some of the dma structures */
{
ptr->main_dma.dma_dstAddr = 0;
ptr->main_dma.dma_srcEvent = SDRAM2ELAN(ctx, &elan_ptr->event32);
ptr->main_dma.dma_dstEvent = SDRAM2ELAN(ctx, queue->input);
INITEVENT_WORD(ctx, (EVENT *)&elan_ptr->event32,
&ptr->main_doneWord);
RESETEVENT_WORD(&ptr->main_doneWord);
PRIMEEVENT_WORD(ctx, (EVENT *)&elan_ptr->event32, 1);
}
/* Progress to the next element */
ptr = (ompi_ptl_elan_queue_send_t*) ((char *) ptr + main_size);
elan_ptr = (ompi_elan_event_t *) ((char *) elan_ptr + elan_size);
}
flist->fl_num_allocated += flist->fl_num_per_alloc;
return OMPI_SUCCESS;
}
int
ompi_init_elan_stat (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
{
return (OMPI_SUCCESS);
}
int
ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
{
int i;
int nslots = 128;
int slotsize = 2048;
RAIL *rail;
ELAN4_CTX *ctx;
struct mca_ptl_elan_t * ptl;
/* Init the Transmit Queue structure */
for ( i = 0 ; i < num_rails; i++ ) {
ompi_ptl_elan_recv_queue_t *rxq;
ompi_ptl_elan_queue_ctrl_t *queue;
ptl = emp->elan_ptls[i];
rail = (RAIL *) ptl->ptl_elan_rail;
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
queue = ptl->queue = (ompi_ptl_elan_queue_ctrl_t *)
malloc(sizeof(ompi_ptl_elan_queue_ctrl_t));
OMPI_PTL_ELAN_CHECK_UNEX(queue, NULL, OMPI_ERROR, 0);
memset(queue, 0, sizeof(ompi_ptl_elan_queue_ctrl_t));
/* Allocate input queue */
queue->input = (E4_InputQueue *) elan4_allocElan(rail->r_alloc,
INPUT_QUEUE_ALIGN, INPUT_QUEUE_SIZE);
OMPI_PTL_ELAN_CHECK_UNEX(queue->input, NULL, OMPI_ERROR, 0);
queue->tx_cmdq = elan4_alloc_cmdq (ctx,
rail->r_alloc,
CQ_Size8K,
CQ_WriteEnableBit | CQ_DmaStartEnableBit | CQ_STENEnableBit,
NULL);
OMPI_PTL_ELAN_CHECK_UNEX(queue->tx_cmdq, NULL, OMPI_ERROR, 0);
/*
* Elan4 has a rather complicated hierarchical event mechanism.
* It is easy to use but nontrivial to manipulate
* We implement a simpler event control mechanism, which
* should also provide us the capability to chain event,
* dma and IRQ etc but more open to update.
*
* Initialize a new event list managing this queue */
ompi_init_elan_queue_events(ptl, queue);
/* Allocate a cookie pool */
queue->tx_cpool = elan4_allocCookiePool(ctx, ptl->elan_vp);
/* Init the Receive Queue structure */
queue->rx_nslots = 128;
nslots += ELAN_QUEUE_LOST_SLOTS;
queue->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ?
INPUT_QUEUE_MAX : slotsize;
queue->rx_slotsize = ELAN_ALIGNUP(slotsize, SLOT_ALIGN);
rxq = queue->rxq = (ompi_ptl_elan_recv_queue_t *)
elan4_allocMain(rail->r_alloc, 64,
sizeof(ompi_ptl_elan_recv_queue_t));
OMPI_PTL_ELAN_CHECK_UNEX(rxq, NULL, OMPI_ERROR, 0);
memset(rxq,0,sizeof(ompi_ptl_elan_recv_queue_t));
rxq->qr_rail = rail;
rxq->qr_fptr = elan4_allocMain(rail->r_alloc,
128, nslots * queue->rx_slotsize);
OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_fptr, NULL, OMPI_ERROR, 0);
memset(rxq->qr_fptr, 0xeb, nslots * queue->rx_slotsize);
rxq->qr_elanDone = ALLOC_ELAN(rail, SLOT_ALIGN, sizeof(EVENT32));
OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_elanDone, NULL, OMPI_ERROR, 0);
/* Set the top et al */
rxq->qr_efitem = (E4_uint64)elan4_main2elan(ctx, rxq->qr_fptr);
rxq->qr_base = rxq->qr_fptr;
rxq->qr_top = (void *)((uintptr_t)rxq->qr_base
+ (queue->rx_slotsize * (nslots-1)));
rxq->qr_efptr = rxq->qr_efitem;
rxq->qr_elitem = rxq->qr_efitem + (queue->rx_slotsize * (nslots-1));
/* Event to wait/block on */
rxq->qr_qEvent = &rxq->qr_elanDone;
queue->input->q_event = SDRAM2ELAN(ctx, (void *)&rxq->qr_elanDone);
queue->input->q_fptr = rxq->qr_efitem;
queue->input->q_bptr = rxq->qr_efitem;
queue->input->q_control = E4_InputQueueControl(
rxq->qr_efitem, rxq->qr_elitem, queue->rx_slotsize);
/* The event */
INITEVENT_WORD(ctx, (EVENT *)&rxq->qr_elanDone,
&rxq->qr_doneWord);
RESETEVENT_WORD(&rxq->qr_doneWord);
PRIMEEVENT_WORD(ctx, (EVENT *)&rxq->qr_elanDone, 1);
rxq->qr_cmdq = elan4_alloc_cmdq(
ctx, rail->r_alloc,
CQ_Size1K, CQ_WriteEnableBit | CQ_WaitEventEnableBit,
NULL);
OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_cmdq, NULL, OMPI_ERROR, 0);
/* Allocate a sleepDesc for threads to block on */
rxq->qr_es = ompi_init_elan_sleepdesc(&mca_ptl_elan_global_state,
rxq->qr_rail);
OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_es, NULL, OMPI_ERROR, 0);
OBJ_CONSTRUCT (&queue->rx_lock, ompi_mutex_t);
}
return (OMPI_SUCCESS);
}
int ompi_init_elan_rdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
{
return (OMPI_SUCCESS);
}
int ompi_init_elan_sten (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
{
return (OMPI_SUCCESS);
}

Просмотреть файл

@ -71,7 +71,7 @@ mca_ptl_elan_recv_frag_handler (mca_ptl_elan_recv_frag_t *, int sd);
void
mca_ptl_elan_recv_frag_init (mca_ptl_elan_recv_frag_t * frag,
struct mca_ptl_elan_peer_t *peer);
struct mca_ptl_elan_peer_t *peer);
bool
mca_ptl_elan_recv_frag_send_ack (mca_ptl_elan_recv_frag_t * frag);
@ -91,7 +91,7 @@ mca_ptl_elan_send_frag_progress (mca_ptl_elan_send_frag_t * frag)
static inline void
mca_ptl_elan_send_frag_init_ack (mca_ptl_elan_send_frag_t * ack,
struct mca_ptl_t *ptl,
struct mca_ptl_elan_peer_t *ptl_peer,
struct mca_ptl_elan_peer_T *ptl_peer,
mca_ptl_elan_recv_frag_t * frag)
{
return;

Просмотреть файл

@ -9,351 +9,13 @@
#include "ptl_elan.h"
#include "ptl_elan_priv.h"
#define ELAN_QUEUE_MAX INPUT_QUEUE_MAX
#define ELAN_QUEUE_LOST_SLOTS 1
#define SLOT_ALIGN 128
mca_ptl_elan_state_t mca_ptl_elan_global_state;
static int ompi_elan_attach_network (mca_ptl_elan_state_t * ems);
static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems);
static int ompi_init_elan_qdma (mca_ptl_elan_state_t * ems);
static int ompi_init_elan_sten (mca_ptl_elan_state_t * ems);
static int ompi_init_elan_rdma (mca_ptl_elan_state_t * ems);
static int ompi_init_elan_stat (mca_ptl_elan_state_t * ems);
/* Accessory functions to deallocate the memory */
static void ompi_module_elan_close_ptls (mca_ptl_elan_state_t * ems);
static void ompi_module_elan_close_procs (mca_ptl_elan_state_t * ems);
int
ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
{
int i;
int *rails;
int num_rails;
int alloc_mainsize;
int alloc_mainbase;
int alloc_elansize;
int alloc_elanbase;
mca_ptl_elan_state_t *ems;
ems = &mca_ptl_elan_global_state;
ems->elan_module = emp;
/* Initialise enough of state so we can call elan_exception() */
ems->elan_version = ELAN_VERSION;
ems->elan_ctx = NULL;
ems->elan_rail = NULL;
ems->elan_vp = ELAN_INVALID_PROCESS;
ems->elan_nvp = 0;
ems->elan_debug = 0;
ems->elan_traced = 0;
ems->elan_pagesize = sysconf (_SC_PAGESIZE);
ems->elan_pid = getpid ();
/* Default allocator parameters */
ems->elan_flags = 0;
ems->main_size = ELAN_ALLOC_SIZE;
ems->elan_size = ELAN_ALLOCELAN_SIZE;
ems->elan_flags |= (EXCEPTIONCORE | EXCEPTIONTRACE | EXCEPTIONDBGDUMP);
ems->elan_debugfile = (FILE *) NULL;
ems->elan_signalnum = SIGABRT;
#ifdef ELAN_VERSION
if (!elan_checkVersion (ELAN_VERSION)) {
return OMPI_ERROR;
}
#endif
/* Allocate elan capability from the heap */
ems->elan_cap = (ELAN_CAPABILITY *) malloc (sizeof (ELAN_CAPABILITY));
if (NULL == ems->elan_cap) {
ompi_output (0,
"[%s:%d] error in allocating memory for elan capability \n",
__FILE__, __LINE__);
return OMPI_ERROR;
} else {
memset (ems->elan_cap, 0, sizeof (ELAN_CAPABILITY));
}
/* Process the capability info supplied by RMS */
if (getenv ("ELAN_AUTO") || getenv ("RMS_NPROCS")) {
/* RMS generated capabilities */
if (rms_getcap (0, ems->elan_cap)) {
ompi_output (0,
"[%s:%d] error in gettting elan capability \n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
}
if ((num_rails = ems->elan_nrails = elan_nrails (ems->elan_cap)) <= 0) {
ompi_output (0,
"[%s:%d] error in gettting number of rails \n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
ems->all_rails = (RAIL **) malloc (sizeof (RAIL *) * num_rails);
if (ems->all_rails == NULL) {
ompi_output (0,
"[%s:%d] error in allocating memory for all_rails\n",
__FILE__, __LINE__);
return OMPI_ERR_OUT_OF_RESOURCE;
}
ems->all_estates = (ADDR_SDRAM *)
malloc (sizeof (ELAN_ESTATE *) * num_rails);
if (ems->all_estates == NULL) {
ompi_output (0,
"[%s:%d] error in allocating memory for all_estates\n",
__FILE__, __LINE__);
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (NULL == (rails = (int *) malloc (sizeof (int) * num_rails))) {
ompi_output (0,
"[%s:%d] error in allocating memory \n",
__FILE__, __LINE__);
return OMPI_ERR_OUT_OF_RESOURCE;
}
(void) elan_rails (ems->elan_cap, rails);
if (NULL == (ems->elan_rail = (ELAN_RAIL **)
malloc (sizeof (ELAN_RAIL **) * (num_rails + 1)))) {
ompi_output (0,
"[%s:%d] error in allocating memory for elan_rail \n",
__FILE__, __LINE__);
return OMPI_ERR_OUT_OF_RESOURCE;
}
ems->elan_rail[num_rails] = NULL;
alloc_mainsize = ELAN_ALIGNUP (ems->main_size, ems->elan_pagesize);
alloc_mainbase = (ADDR_ELAN) ((uintptr_t) ems->main_base);
alloc_elansize = ELAN_ALIGNUP (ems->elan_size, ems->elan_pagesize);
alloc_elanbase = (ADDR_ELAN) ((uintptr_t) ems->elan_base);
/* Magic quadrics number for the starting cookie value */
ems->intcookie = 42;
for (i = 0; i < num_rails; i++) {
RAIL *rail;
ELAN_ESTATE *estate;
ELAN_EPRIVSTATE *priv_estate;
/* Allocate the Main memory control structure for this rail */
if (NULL == (rail = ems->all_rails[i] =
(RAIL *) malloc (sizeof (RAIL)))) {
ompi_output (0,
"[%s:%d] error in malloc for all_rails[i]\n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
memset (rail, 0, sizeof (RAIL));
if (NULL == (rail->r_ctx = elan4_init (rails[i]))) {
ompi_output (0,
"[%s:%d] error in initializing rail %d \n",
__FILE__, __LINE__, rails[i]);
return OMPI_ERROR;
}
if (NULL == (rail->r_sdram = elan4_open_sdram (rails[i],
0, alloc_elansize)))
{
ompi_output (0,
"[%s:%d] error opening sdram for rail %d \n",
__FILE__, __LINE__, rails[i]);
return OMPI_ERROR;
}
if (NULL == (rail->r_alloc =
elan4_createAllocator (ems->main_size,
rail->r_sdram, 0,
ems->elan_size))) {
ompi_output (0,
"[%s:%d] error creating allocator for rail %d \n",
__FILE__, __LINE__, rails[i]);
return OMPI_ERROR;
}
if (elan4_set_standard_mappings (rail->r_ctx) < 0
|| elan4_set_required_mappings (rail->r_ctx) < 0) {
ompi_output (0,
"[%s:%d] error setting memory mapping for rail %d \n",
__FILE__, __LINE__, rails[i]);
return OMPI_ERROR;
}
/* Now allocate the SDRAM Elan control structure for this rail */
if (NULL == (estate = ems->all_estates[i] =
elan4_allocElan (rail->r_alloc, ELAN_ALIGN,
sizeof (ELAN_EPRIVSTATE)))) {
ompi_output (0,
"[%s:%d] error in allocating memory "
"for estate from rail %d \n",
__FILE__, __LINE__, rails[i]);
return OMPI_ERROR;
}
priv_estate = (ELAN_EPRIVSTATE *) estate;
memset (priv_estate, 0, sizeof (ELAN_EPRIVSTATE));
/* Allocate a command port for non sten functions etc */
if (NULL == (rail->r_cmdq = elan4_alloc_cmdq (rail->r_ctx,
rail->r_alloc,
CQ_Size8K,
CQ_ModifyEnableBit |
CQ_WriteEnableBit |
CQ_WaitEventEnableBit
|
CQ_SetEventEnableBit
|
CQ_ThreadStartEnableBit,
NULL))) {
ompi_output (0,
"[%s:%d] error in allocating command port "
"for rail %d \n", __FILE__, __LINE__, rails[i]);
return OMPI_ERROR;
}
/* Allocate a command port for thread rescheduling etc */
if (NULL == (rail->r_ecmdq = elan4_alloc_cmdq (rail->r_ctx,
rail->r_alloc,
CQ_Size8K,
CQ_EnableAllBits,
NULL))) {
ompi_output (0,
"[%s:%d] error in allocating thread command port "
"for rail %d \n", __FILE__, __LINE__, rails[i]);
return OMPI_ERROR;
}
priv_estate->cport = MAIN2ELAN (rail->r_ctx,
rail->r_ecmdq->cmdq_mapping);
/* save the rail pointers */
ems->elan_rail[i] = (ELAN_RAIL *) rail;
estate->alloc = rail->r_alloc;
estate->vp = ems->elan_vp;
estate->debugFlags = ems->elan_flags;
estate->debugFd = 1;
priv_estate->pageSize = ems->elan_pagesize;
rail->r_estate = estate;
rail->r_railNo = rails[i];
{
/*ompi_elan_railtable_t *rt; */
struct railtable *rt;
if (NULL == (rt = (struct railtable *)
malloc (sizeof (struct railtable)))) {
ompi_output (0,
"[%s:%d] error in allocating memory for railTable \n"
__FILE__, __LINE__);
return OMPI_ERROR;
}
memset (rt, 0, sizeof (struct railtable));
rt->rt_nrails = 1;
rt->rt_rail = 0;
rt->rt_railReal = i;
rt->rt_allRails = (RAIL **) & (ems->all_rails[i]);
rail->r_railTable = rt;
}
} /* for each rail */
/* Free the local variable */
free (rails);
ems->elan_ctx = ems->elan_rail[0]->rail_ctx;
ems->elan_estate = (void *) ems->all_estates[0];
#if 0
/* Leave the junky code here to remind me later */
_elan_eventInit (privState);
elan_setDebugHandler (state, (ELAN_DBGH) _elan_allocDbg, state);
atexit (_elan_atExitCallBack);
#endif
ompi_elan_attach_network (ems);
/* Set the rms_resourceId */
if (rms_getprgid (getpid (), &ems->elan_rmsid) < 0) {
ems->elan_rmsid = -1;
}
/* Now open ourselves to the network */
for (i = 0; ems->elan_rail[i]; i++) {
elan4_block_inputter (ems->elan_rail[i]->rail_ctx, 0);
}
/* setup communication infrastructure and construct PTL's */
if (OMPI_SUCCESS != ompi_mca_ptl_elan_setup (ems)) {
ompi_output (0,
"[%s:%d] error in setting up elan "
"communication state machines for elan PTL's.\n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
return (OMPI_SUCCESS);
}
int
ompi_mca_ptl_elan_finalize (
mca_ptl_elan_module_1_0_0_t * emp)
{
int i;
int num_rails;
mca_ptl_elan_state_t *ems;
ems = &mca_ptl_elan_global_state;
ompi_module_elan_close_ptls (&mca_ptl_elan_global_state);
ompi_module_elan_close_procs (&mca_ptl_elan_global_state);
/* Cleanup the global state
* Free per rail structures, then the references to them */
num_rails = ems->elan_nrails;
for (i = 0; i < num_rails; i++) {
RAIL *rail;
rail = ems->all_rails[i];
free (rail->r_railTable);
/* Free the memory from the rail allocator */
elan4_freeMain(rail->r_alloc, rail->r_ecmdq);
elan4_freeMain(rail->r_alloc, rail->r_cmdq);
elan4_freeElan(rail->r_alloc, ems->all_estates[i]);
/* Destroy allocator and SDRAM handler and then close device */
elan4_destroyAllocator (rail->r_alloc);
elan4_close_sdram (rail->r_sdram);
elan4_fini (rail->r_ctx);
/* Free the rail structure, why two pointers are used to
* point to the same RAIL, all_rails and elan_rails */
/*free (ems->elan_rail[i]);*/
free (ems->all_rails[i]);
}
free(ems->elan_rail);
free(ems->all_estates);
free(ems->all_rails);
free(ems->elan_cap);
return (OMPI_SUCCESS);
}
static int
ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
{
/* TODO:
* a) create elan PTL instances
@ -392,9 +54,11 @@ ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
memcpy(ptl, &mca_ptl_elan, sizeof(mca_ptl_elan));
emp->elan_ptls[emp->elan_num_ptls] = ptl;
/* MCA related structures */
ptl->ptl_ni_local = emp->elan_num_ptls;
ptl->ptl_ni_total = rail_count;
emp->elan_num_ptls ++;
/* allow user to specify per rail bandwidth and latency */
@ -404,24 +68,29 @@ ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
sprintf(param, "latency_elanrail%d", emp->elan_num_ptls);
ptl->super.ptl_latency = mca_ptl_elan_param_register_int(param, 1);
/* Setup elan related structures such as ctx, rail */
ptl->ptl_elan_rail = ems->elan_rail[rail_count];
ptl->ptl_elan_ctx = ems->elan_rail[rail_count]->rail_ctx;
ptl->elan_vp = ems->elan_vp;
ptl->elan_nvp = ems->elan_nvp;
} while (emp->elan_num_ptls < rail_count);
/* Allocating all the communication strcutures for PTL's,
* XXX: Leave it later after finalization is done
*/
if (OMPI_SUCCESS != ompi_init_elan_qdma (ems)) {
if (OMPI_SUCCESS != ompi_init_elan_qdma (emp, rail_count)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS != ompi_init_elan_rdma (ems)) {
if (OMPI_SUCCESS != ompi_init_elan_rdma (emp, rail_count)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS != ompi_init_elan_sten (ems)) {
if (OMPI_SUCCESS != ompi_init_elan_sten (emp, rail_count)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS != ompi_init_elan_stat (ems)) {
if (OMPI_SUCCESS != ompi_init_elan_stat (emp, rail_count)) {
return OMPI_ERROR;
}
@ -564,36 +233,357 @@ ompi_elan_attach_network (mca_ptl_elan_state_t * ems)
return (OMPI_SUCCESS);
}
static int
ompi_init_elan_qdma (mca_ptl_elan_state_t * mp)
{
return (OMPI_SUCCESS);
}
static int
ompi_init_elan_rdma (mca_ptl_elan_state_t * mp)
{
return (OMPI_SUCCESS);
}
static int
ompi_init_elan_sten (mca_ptl_elan_state_t * mp)
{
return (OMPI_SUCCESS);
}
static int
ompi_init_elan_stat (mca_ptl_elan_state_t * mp)
{
return (OMPI_SUCCESS);
}
static void
ompi_module_elan_close_ptls (mca_ptl_elan_state_t * ems) {
ompi_module_elan_close_ptls (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
{
}
static void
ompi_module_elan_close_procs (mca_ptl_elan_state_t * ems) {
ompi_module_elan_close_procs (mca_ptl_elan_module_1_0_0_t* emp, int num_rails)
{
}
static void ompi_init_elan_queue_events(ompi_ptl_elan_queue_ctrl_t *queue)
{
}
ELAN_SLEEP *
ompi_init_elan_sleepdesc(mca_ptl_elan_state_t * ems, RAIL *rail)
{
ELAN_SLEEP *es;
/* XXX: asking the caller to hold the lock */
es = MALLOC(sizeof(ELAN_SLEEP));
OMPI_PTL_ELAN_CHECK_UNEX(es, NULL, NULL, 0);
memset(es, 0, sizeof(ELAN_SLEEP));
/* Assign next interrupt cookie value */
es->es_cookie = ems->intcookie++;
/* XXX, rail[0] is choosen instead this rail */
if (elan4_alloc_intcookie(ems->elan_rail[0]->rail_ctx,
es->es_cookie) < 0) {
ompi_output(0,
"[%s:%d] Failed to allocate IRQ cookie \n",
__FILE__, __LINE__);
}
es->es_cmdBlk = ALLOC_ELAN(rail, E4_EVENTBLOCK_SIZE,
E4_EVENTBLOCK_SIZE);
OMPI_PTL_ELAN_CHECK_UNEX(es->es_cmdBlk, 0, NULL, 0);
/*Allocate a pair of command queues for blocking waits with*/
es->es_cmdq = elan4_alloc_cmdq(rail->r_ctx, rail->r_alloc,
CQ_Size1K, CQ_WriteEnableBit | CQ_WaitEventEnableBit,
NULL);
OMPI_PTL_ELAN_CHECK_UNEX(es->es_cmdq, NULL, NULL, 0);
/* This command queue used to fire the IRQ via
a cmd port copy event */
es->es_ecmdq = elan4_alloc_cmdq(rail->r_ctx,
rail->r_alloc, CQ_Size1K, /* CQ_EnableAllBits, */
CQ_WriteEnableBit | CQ_InterruptEnableBit, NULL);
OMPI_PTL_ELAN_CHECK_UNEX(es->es_ecmdq, NULL, NULL, 0);
es->es_next = NULL;
/* XXX: asking the caller to release the lock */
return es;
}
int
ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp)
{
int i;
int *rails;
int num_rails;
int alloc_mainsize;
int alloc_mainbase;
int alloc_elansize;
int alloc_elanbase;
mca_ptl_elan_state_t *ems;
ems = &mca_ptl_elan_global_state;
/* Hook two of them togther */
ems->elan_module = emp;
emp->elan_ctrl = ems;
/* Initialise enough of state so we can call elan_exception() */
ems->elan_version = ELAN_VERSION;
ems->elan_ctx = NULL;
ems->elan_rail = NULL;
ems->elan_vp = ELAN_INVALID_PROCESS;
ems->elan_nvp = 0;
ems->elan_debug = 0;
ems->elan_traced = 0;
ems->elan_pagesize = sysconf (_SC_PAGESIZE);
ems->elan_pid = getpid ();
/* Default allocator parameters */
ems->elan_flags = 0;
ems->elan_waittype = ELAN_POLL_EVENT; /* or ELAN_WAIT_EVENT */
ems->main_size = ELAN_ALLOC_SIZE;
ems->elan_size = ELAN_ALLOCELAN_SIZE;
ems->elan_flags |= (EXCEPTIONCORE | EXCEPTIONTRACE | EXCEPTIONDBGDUMP);
ems->elan_debugfile = (FILE *) NULL;
ems->elan_signalnum = SIGABRT;
#ifdef ELAN_VERSION
if (!elan_checkVersion (ELAN_VERSION)) {
ompi_output (0,
"Elan version is not compatible with %s \n",
ELAN_VERSION);
return OMPI_ERROR;
}
#endif
/* Allocate elan capability from the heap */
ems->elan_cap = (ELAN_CAPABILITY *) malloc (sizeof (ELAN_CAPABILITY));
OMPI_PTL_ELAN_CHECK_UNEX(ems->elan_cap, NULL, OMPI_ERROR, 0);
memset (ems->elan_cap, 0, sizeof (ELAN_CAPABILITY));
/* Process the capability info supplied by RMS */
if (getenv ("ELAN_AUTO") || getenv ("RMS_NPROCS")) {
/* RMS generated capabilities */
if (rms_getcap (0, ems->elan_cap)) {
ompi_output (0,
"[%s:%d] error in gettting elan capability \n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
}
if ((num_rails = ems->elan_nrails = elan_nrails (ems->elan_cap)) <= 0) {
ompi_output (0,
"[%s:%d] error in gettting number of rails \n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
ems->all_rails = (RAIL **) malloc (sizeof (RAIL *) * num_rails);
OMPI_PTL_ELAN_CHECK_UNEX(ems->all_rails, NULL,
OMPI_ERR_OUT_OF_RESOURCE, 0);
ems->all_estates = (ADDR_SDRAM *)
malloc (sizeof (ELAN_ESTATE *) * num_rails);
OMPI_PTL_ELAN_CHECK_UNEX(ems->all_estates, NULL,
OMPI_ERR_OUT_OF_RESOURCE, 0);
rails = (int *) malloc (sizeof (int) * num_rails);
OMPI_PTL_ELAN_CHECK_UNEX(rails, NULL, OMPI_ERR_OUT_OF_RESOURCE, 0);
(void) elan_rails (ems->elan_cap, rails);
ems->elan_rail = (ELAN_RAIL **) malloc (sizeof (ELAN_RAIL **)
* (num_rails + 1));
OMPI_PTL_ELAN_CHECK_UNEX(ems->elan_rail, NULL,
OMPI_ERR_OUT_OF_RESOURCE, 0);
ems->elan_rail[num_rails] = NULL;
alloc_mainsize = ELAN_ALIGNUP (ems->main_size, ems->elan_pagesize);
alloc_mainbase = (ADDR_ELAN) ((uintptr_t) ems->main_base);
alloc_elansize = ELAN_ALIGNUP (ems->elan_size, ems->elan_pagesize);
alloc_elanbase = (ADDR_ELAN) ((uintptr_t) ems->elan_base);
/* Magic quadrics number for the starting cookie value */
ems->intcookie = 42;
ems->rail_intcookie = (int*) malloc (sizeof (int)*(num_rails + 1));
OMPI_PTL_ELAN_CHECK_UNEX(ems->rail_intcookie, NULL,
OMPI_ERR_OUT_OF_RESOURCE, 0);
memset (ems->elan_cap, 0, (num_rails + 1) * sizeof (int));
ems->rail_intcookie[num_rails] = NULL;
for (i = 0; i < num_rails; i++) {
RAIL *rail;
ELAN_ESTATE *estate;
ELAN_EPRIVSTATE *priv_estate;
ELAN_SLEEP *es;
/* Allocate the Main memory control structure for this rail */
rail = ems->all_rails[i] = (RAIL *) malloc (sizeof (RAIL));
OMPI_PTL_ELAN_CHECK_UNEX(rail, NULL, OMPI_ERROR, 0);
memset (rail, 0, sizeof (RAIL));
rail->r_ctx = elan4_init (rails[i]);
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_ctx, NULL, OMPI_ERROR, 0);
rail->r_sdram = elan4_open_sdram (rails[i], 0, alloc_elansize);
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_sdram, NULL, OMPI_ERROR, 0);
rail->r_alloc = elan4_createAllocator (ems->main_size,
rail->r_sdram, 0, ems->elan_size);
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_alloc, NULL, OMPI_ERROR, 0);
if (elan4_set_standard_mappings (rail->r_ctx) < 0
|| elan4_set_required_mappings (rail->r_ctx) < 0) {
ompi_output (0,
"[%s:%d] error setting memory mapping for rail %d \n",
__FILE__, __LINE__, rails[i]);
return OMPI_ERROR;
}
#if 0 /* Is this only needed for TPORT support? */
if (elan4_register_trap_handler(rail->r_ctx, UTS_UNIMP_INSTR,
UTS_TPROC, elan_unimp_handler, NULL) < 0) {
ompi_output(0, "elan_init(%d): Failed elan4_register_unimp()", i);
return OMPI_ERROR;
}
#endif
/* Now allocate the SDRAM Elan control structure for this rail */
estate = ems->all_estates[i] = elan4_allocElan (rail->r_alloc,
ELAN_ALIGN, sizeof (ELAN_EPRIVSTATE));
OMPI_PTL_ELAN_CHECK_UNEX(estate, NULL, OMPI_ERROR, 0);
priv_estate = (ELAN_EPRIVSTATE *) estate;
memset (priv_estate, 0, sizeof (ELAN_EPRIVSTATE));
/* Allocate a command port for non sten functions etc */
rail->r_cmdq = elan4_alloc_cmdq (rail->r_ctx,
rail->r_alloc,
CQ_Size8K,
CQ_ModifyEnableBit | CQ_WriteEnableBit | CQ_WaitEventEnableBit
| CQ_SetEventEnableBit | CQ_ThreadStartEnableBit,
NULL);
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_cmdq, NULL, OMPI_ERROR, 0);
/* Allocate a command port for thread rescheduling etc */
rail->r_ecmdq = elan4_alloc_cmdq (rail->r_ctx, rail->r_alloc,
CQ_Size8K, CQ_EnableAllBits, NULL);
OMPI_PTL_ELAN_CHECK_UNEX(rail->r_ecmdq, NULL, OMPI_ERROR, 0);
priv_estate->cport = MAIN2ELAN (rail->r_ctx,
rail->r_ecmdq->cmdq_mapping);
/* save the rail pointers */
ems->elan_rail[i] = (ELAN_RAIL *) rail;
ems->rail_intcookie[i] = ems->intcookie;
/* Allocate a Sleep Desc */
es = ompi_init_elan_sleepdesc(ems, rail);
/* XXX: put a lock and hold a lock */
es->es_next = rail->r_sleepDescs;
rail->r_sleepDescs = es;
/* XXX: release the lock */
estate->alloc = rail->r_alloc;
estate->vp = ems->elan_vp;
estate->debugFlags = ems->elan_flags;
estate->debugFd = 1;
priv_estate->pageSize = ems->elan_pagesize;
rail->r_estate = estate;
rail->r_railNo = rails[i];
{
/*ompi_elan_railtable_t *rt; */
struct railtable *rt;
rt = (struct railtable *) malloc (sizeof (struct railtable));
OMPI_PTL_ELAN_CHECK_UNEX(rt, NULL, OMPI_ERROR, 0);
memset (rt, 0, sizeof (struct railtable));
rt->rt_nrails = 1;
rt->rt_rail = 0;
rt->rt_railReal = i;
rt->rt_allRails = (RAIL **) & (ems->all_rails[i]);
rail->r_railTable = rt;
}
} /* for each rail */
/* Free the local variable */
free (rails);
ems->elan_ctx = ems->elan_rail[0]->rail_ctx;
ems->elan_estate = (void *) ems->all_estates[0];
/* XXX: Initialize a list of null events here */
/* Attach to the device and open to the network */
ompi_elan_attach_network (ems);
/* Set the rms_resourceId */
if (rms_getprgid (getpid (), &ems->elan_rmsid) < 0) {
ems->elan_rmsid = -1;
}
/* Now open ourselves to the network */
for (i = 0; ems->elan_rail[i]; i++) {
elan4_block_inputter (ems->elan_rail[i]->rail_ctx, 0);
}
/* setup communication infrastructure and construct PTL's */
if (OMPI_SUCCESS != ompi_mca_ptl_elan_setup (ems)) {
ompi_output (0,
"[%s:%d] error in setting up elan "
"communication state machines for elan PTL's.\n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
return (OMPI_SUCCESS);
}
int
ompi_mca_ptl_elan_finalize (mca_ptl_elan_module_1_0_0_t * emp)
{
int i;
int num_rails;
mca_ptl_elan_state_t *ems;
ems = &mca_ptl_elan_global_state;
num_rails = ems->elan_nrails;
ompi_module_elan_close_ptls (emp, num_rails);
ompi_module_elan_close_procs (emp, num_rails);
/* Cleanup the global state
* Free per rail structures, then the references to them */
for (i = 0; i < num_rails; i++) {
RAIL *rail;
rail = ems->all_rails[i];
free (rail->r_railTable);
/* Free the memory from the rail allocator */
elan4_freeMain(rail->r_alloc, rail->r_ecmdq);
elan4_freeMain(rail->r_alloc, rail->r_cmdq);
elan4_freeElan(rail->r_alloc, ems->all_estates[i]);
/* Free cookie value, Destroy allocator and SDRAM handler and
then close device */
/* Since the cookie allocated from rail[0], be consistent here */
elan4_free_intcookie (ems->all_rails[0]->r_ctx,
ems->rail_intcookie[i]);
elan4_destroyAllocator (rail->r_alloc);
elan4_close_sdram (rail->r_sdram);
/*elan4_fini (rail->r_ctx); Not working yet */
/* Free the rail structure, why two pointers are used to
* point to the same RAIL, all_rails and elan_rails */
/*free (ems->elan_rail[i]);*/
free (ems->all_rails[i]);
}
free(ems->elan_rail);
free(ems->all_estates);
free(ems->all_rails);
free(ems->elan_cap);
return (OMPI_SUCCESS);
}

Просмотреть файл

@ -179,20 +179,7 @@ mca_ptl_elan_module_init (int *num_ptls,
*allow_multi_user_threads = true;
*have_hidden_threads = OMPI_HAVE_THREADS;
/* need to set ompi_using_threads() as ompi_event_init()
* will spawn a thread if supported */
if(OMPI_HAVE_THREADS)
ompi_set_using_threads(true);
/* duplicated actions are avoid with a static variable,
* inited in ompi_event_init() */
if ((rc = ompi_event_init ()) != OMPI_SUCCESS) {
ompi_output(0,
"[%s:%d] mca_ptl_elan_module_init: "
"unable to initialize event dispatch thread: %d\n",
__FILE__, __LINE__, rc);
return NULL;
}
/* Leave the thread related setting to PML:PTL(TCP) to decide */
/* initialize free lists */
ompi_free_list_init (&(elan_mp->elan_reqs_free),
@ -218,6 +205,7 @@ mca_ptl_elan_module_init (int *num_ptls,
__FILE__, __LINE__);
return NULL;
}
/*
* (mca_ptl_elan_module_exchange () != OMPI_SUCCESS)
*

Просмотреть файл

@ -36,9 +36,91 @@
#include <elan/init.h>
#include <rms/rmscall.h>
#include "misc_sys.h"
#include "init_sys.h"
#include "elan4/events.h"
#define OMPI_PTL_ELAN_CHECK_UNEX(value, unexp, errno, output) \
do { \
if (value == unexp) { \
ompi_output(output, \
"[%s:%d] received unexpect allocated value \n", \
__FILE__, __LINE__); \
return errno; \
} \
} while (0)
struct ompi_ptl_elan_recv_queue_t
{
/* Events needs to be aligned */
EVENT_WORD qr_doneWord;
ADDR_SDRAM qr_qEvent;
EVENT32 *qr_elanDone;
/* The one don't care */
E4_uint64 qr_efitem;
E4_uint64 qr_efptr;
E4_uint64 qr_elitem;
void *qr_base;
void *qr_fptr;
void *qr_top;
E4_CmdQ *qr_cmdq;
ELAN_SLEEP *qr_es;
RAIL *qr_rail;
};
typedef struct ompi_ptl_elan_recv_queue_t ompi_ptl_elan_recv_queue_t;
typedef struct
{
/* SHOULD BE 128-byte aligned */
uint8_t data[INPUT_QUEUE_MAX]; /* queue req data packet */
/* SHOULD be 32-byte aligned */
E4_Event32 event32; /* Local elan completion event */
} ompi_elan_event_t;
struct ompi_ptl_elan_queue_send_t
{
E4_DMA64 main_dma; /**< Must be 8-byte aligned */
/* 8 byte aligned */
volatile E4_uint64 main_doneWord; /**< main memory location to poll */
ompi_elan_event_t *elan_data_event; /**< 128-byte aligned copy event */
RAIL *rail;
/* 8 byte aligned */
uint8_t buff[INPUT_QUEUE_MAX]; /**< queue data */
};
typedef struct ompi_ptl_elan_queue_send_t ompi_ptl_elan_queue_send_t;
struct ompi_ptl_elan_queue_ctrl_t
{
/* Transmit Queues */
/** < elan located INPUT_QUEUE_ALIGN'ed with INPUT_QUEUE_SIZE */
E4_InputQueue *input;
/** <transmit queue structures */
void *tx_q;
E4_CmdQ *tx_cmdq;
ELAN4_COOKIEPOOL *tx_cpool;
ompi_event_t *tx_events;
ompi_list_t tx_desc;
ompi_free_list_t tx_desc_free;
/* User progression */
ompi_mutex_t rx_lock;
int rx_buffsize;
int rx_slotsize;
int rx_nslots;
/*Automatic progression */
void (*rx_fn)(void);
void *rx_handle;
/* Recv Queue has to be well-aligned */
ompi_ptl_elan_recv_queue_t *rxq;
};
typedef struct ompi_ptl_elan_queue_ctrl_t ompi_ptl_elan_queue_ctrl_t;
struct mca_ptl_elan_state_t {
@ -51,6 +133,7 @@ struct mca_ptl_elan_state_t {
FILE *elan_debugfile; /* Debug output file handle */
int elan_signalnum;
long elan_waittype; /**< how to wait for events */
size_t main_size; /**< size of Main memory allocator heap */
size_t elan_size; /**< size of Elan memory allocator heap */
void *main_base; /**< Main memory allocator heap base */
@ -82,15 +165,26 @@ struct mca_ptl_elan_state_t {
void *elan_estate; /**< Elan state of the 0th rail */
ELAN_RAIL **elan_rail; /**< pointers to Rail control struct for all rails */
RAIL **all_rails; /**< all rails */
int *rail_intcookie; /**< record the cookies for the rail */
ADDR_SDRAM *all_estates;
mca_ptl_elan_module_1_0_0_t *elan_module;
};
typedef struct mca_ptl_elan_state_t mca_ptl_elan_state_t;
/* Util functions, consider moving into a file ptl_elan_util.h */
ELAN_SLEEP *
ompi_init_elan_sleepdesc(mca_ptl_elan_state_t * ems, RAIL *rail);
/* Initialization and finalization routines */
int ompi_mca_ptl_elan_init( mca_ptl_elan_module_1_0_0_t * emp);
int ompi_mca_ptl_elan_finalize (mca_ptl_elan_module_1_0_0_t * emp);
/* communication initialization prototypes */
int ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails);
int ompi_init_elan_sten (mca_ptl_elan_module_1_0_0_t* emp, int num_rails);
int ompi_init_elan_rdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails);
int ompi_init_elan_stat (mca_ptl_elan_module_1_0_0_t* emp, int num_rails);
/* communication prototypes */
/* control, synchronization and state prototypes */