1
1

commit in more changes for send frags

This commit was SVN r2194.
Этот коммит содержится в:
Weikuan Yu 2004-08-18 05:05:49 +00:00
родитель cd68dfa757
Коммит 351ea78525
7 изменённых файлов: 58 добавлений и 43 удалений

Просмотреть файл

@ -155,8 +155,8 @@ mca_ptl_elan_finalize (struct mca_ptl_base_module_t *ptl)
free (elan_ptl);
/* Record the missing of this entry */
mca_ptl_elan_component.elan_ptl_modules[rail_index] = NULL;
mca_ptl_elan_component.elan_num_ptl_modules--;
mca_ptl_elan_component.modules[rail_index] = NULL;
mca_ptl_elan_component.num_modules--;
return OMPI_SUCCESS;
}
@ -383,7 +383,7 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl,
__FILE__, __LINE__);
OMPI_THREAD_LOCK(&mca_ptl_elan_component.elan_lock);
recv_frag->frag_ack_pending = true;
ompi_list_append(&mca_ptl_elan_component.elan_pending_acks,
ompi_list_append(&((mca_ptl_elan_module_t * )ptl)->pending_acks,
(ompi_list_item_t*)frag);
OMPI_THREAD_UNLOCK(&mca_ptl_elan_component.elan_lock);
} else {

Просмотреть файл

@ -43,6 +43,7 @@ struct mca_ptl_elan_module_t {
unsigned int elan_nvp; /**< total # of elan vpid */
ompi_list_t send_frags; /**< outstanding send/put/get */
ompi_list_t recv_frags; /**< outstanding recv's */
ompi_list_t pending_acks;
struct ompi_ptl_elan_queue_ctrl_t *queue; /**< Queue ctrl struct*/
struct ompi_ptl_elan_putget_ctrl_t *putget; /**< putget ctrl struct */
@ -57,6 +58,9 @@ struct mca_ptl_elan_component_t {
mca_ptl_base_component_t super; /**< base PTL component */
size_t num_modules; /**< number of ptls activated */
size_t free_list_num; /**< min number of list items */
size_t free_list_max; /**< max number of list items*/
size_t free_list_inc; /**< inc for each grow */
/* We create our own simplified structure for managing elan state
* although libelan already provides one. We do not need

Просмотреть файл

@ -48,10 +48,11 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
OBJ_CONSTRUCT (&queue->tx_desc_free, ompi_free_list_t);
flist = &queue->tx_desc_free;
main_align = GET_MAX (sizeof (void *), 8);
elan_align = GET_MAX (sizeof (int *), ELAN_BLOCK_ALIGN);
main_size = ALIGNUP (sizeof (ompi_ptl_elan_qdma_desc_t), main_align);
elan_size = ALIGNUP (sizeof (E4_Event), elan_align);
main_align = OMPI_PTL_ELAN_GET_MAX (sizeof (void *), 8);
elan_align = OMPI_PTL_ELAN_GET_MAX (sizeof (int *), ELAN_BLOCK_ALIGN);
main_size = OMPI_PTL_ELAN_ALIGNUP (sizeof (ompi_ptl_elan_qdma_desc_t),
main_align);
elan_size = OMPI_PTL_ELAN_ALIGNUP (sizeof (E4_Event), elan_align);
OBJ_CONSTRUCT(&flist->fl_lock, ompi_mutex_t);
flist->fl_elem_size = flist->fl_max_to_alloc = OMPI_PTL_ELAN_MAX_QDESCS;
@ -188,12 +189,14 @@ ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl,
START_FUNC(PTL_ELAN_DEBUG_INIT);
main_align = GET_MAX (sizeof (void *), ELAN_ALIGN);
elan_align = GET_MAX (sizeof (int *), ELAN_BLOCK_ALIGN);
main_size = ALIGNUP(sizeof(ompi_ptl_elan_putget_desc_t), main_align);
main_align = OMPI_PTL_ELAN_GET_MAX (sizeof (void *), ELAN_ALIGN);
elan_align = OMPI_PTL_ELAN_GET_MAX (sizeof (int *), ELAN_BLOCK_ALIGN);
main_size = OMPI_PTL_ELAN_ALIGNUP(sizeof(ompi_ptl_elan_putget_desc_t),
main_align);
/* Contain elan_event, chain_event and a chain_buff */
elan_size = ALIGNUP((sizeof(E4_Event32)*2 + ELAN_BLOCK_SIZE), elan_align);
elan_size = OMPI_PTL_ELAN_ALIGNUP(
(sizeof(E4_Event32)*2 + ELAN_BLOCK_SIZE), elan_align);
rail = (RAIL *) ptl->ptl_elan_rail;
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
@ -257,7 +260,7 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
{
int i;
int nslots = OMPI_PTL_ELAN_MAX_QSLOTS;
int slotsize = OMPI_PTL_ELAN_MAX_QSIZE;;
int slotsize = OMPI_PTL_ELAN_MAX_QSIZE;
RAIL *rail;
ELAN4_CTX *ctx;
struct mca_ptl_elan_module_t *ptl;
@ -270,7 +273,7 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
ompi_ptl_elan_recv_queue_t *rxq;
ompi_ptl_elan_queue_ctrl_t *queue;
ptl = emp->elan_ptl_modules[i];
ptl = emp->modules[i];
rail = (RAIL *) ptl->ptl_elan_rail;
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
@ -309,11 +312,11 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
/* Init the Receive Queue structure */
queue->rx_nslots = nslots;
nslots += ELAN_QUEUE_LOST_SLOTS;
nslots += OMPI_PTL_ELAN_LOST_QSLOTS;
queue->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ?
INPUT_QUEUE_MAX : slotsize;
queue->rx_slotsize = ELAN_ALIGNUP (slotsize, SLOT_ALIGN);
queue->rx_slotsize = ELAN_ALIGNUP (slotsize, OMPI_PTL_ELAN_SLOT_ALIGN);
rxq = queue->rxq = (ompi_ptl_elan_recv_queue_t *)
elan4_allocMain (rail->r_alloc, 64,
@ -327,7 +330,8 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_fptr, NULL, OMPI_ERROR, 0);
memset (rxq->qr_fptr, 0xeb, nslots * queue->rx_slotsize);
rxq->qr_elanDone = ALLOC_ELAN (rail, SLOT_ALIGN, sizeof (EVENT32));
rxq->qr_elanDone = ALLOC_ELAN (rail,
OMPI_PTL_ELAN_SLOT_ALIGN, sizeof (EVENT32));
OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_elanDone, NULL, OMPI_ERROR, 0);
/* Set the top et al */
@ -335,10 +339,10 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
assert(rxq->qr_efitem != ELAN_BAD_ADDR);
rxq->qr_base = rxq->qr_fptr;
rxq->qr_top = (void *) ((uintptr_t) rxq->qr_base +
(queue->rx_slotsize * (nslots - ELAN_QUEUE_LOST_SLOTS)));
(queue->rx_slotsize * (nslots - OMPI_PTL_ELAN_LOST_QSLOTS)));
rxq->qr_efptr = rxq->qr_efitem;
rxq->qr_elitem = rxq->qr_efitem +
(queue->rx_slotsize * (nslots - ELAN_QUEUE_LOST_SLOTS));
(queue->rx_slotsize * (nslots - OMPI_PTL_ELAN_LOST_QSLOTS));
/* Event to wait/block on, Bug here for the event */
rxq->qr_qEvent = rxq->qr_elanDone;
@ -392,7 +396,7 @@ ompi_init_elan_putget (mca_ptl_elan_component_t * emp,
E4_CmdQParams *cqp;
ompi_ptl_elan_putget_ctrl_t *putget;
ptl = emp->elan_ptl_modules[i];
ptl = emp->modules[i];
rail = (RAIL *) ptl->ptl_elan_rail;
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;

Просмотреть файл

@ -71,11 +71,11 @@ static int mca_ptl_elan_addr_put (mca_ptl_elan_component_t *emp)
mca_ptl_elan_addr_t *addrs;
size = emp->elan_num_ptl_modules * sizeof(mca_ptl_elan_addr_t);
size = emp->num_modules * sizeof(mca_ptl_elan_addr_t);
addrs = (mca_ptl_elan_addr_t *) malloc(size);
for(i=0; i< emp->elan_num_ptl_modules; i++) {
mca_ptl_elan_module_t * ptl = emp->elan_ptl_modules[i];
for(i=0; i< emp->num_modules; i++) {
mca_ptl_elan_module_t * ptl = emp->modules[i];
addrs[i].elan_vp = ptl->elan_vp;
addrs[i].inuse = 0;
addrs[i].gid = ompi_proc_local_proc->proc_name;
@ -101,20 +101,26 @@ mca_ptl_elan_component_open (void)
mca_ptl_elan_module.super.ptl_exclusivity =
mca_ptl_elan_param_register_int ("exclusivity", 0);
length = PTL_ELAN_INPUT_QUEUE_MAX - sizeof(mca_ptl_base_header_t);
length = OMPI_PTL_ELAN_MAX_QSIZE - sizeof(mca_ptl_base_header_t);
param1 = mca_ptl_elan_param_register_int ("first_frag_size", length);
param2 = mca_ptl_elan_param_register_int ("min_frag_size", length);
param3 = mca_ptl_elan_param_register_int ("max_frag_size", 2<<31);
/* Correct these if user give violent parameters */
mca_ptl_elan_module.super.ptl_first_frag_size = GET_MIN(param1, length);
mca_ptl_elan_module.super.ptl_min_frag_size = GET_MAX(param2, length);
mca_ptl_elan_module.super.ptl_max_frag_size = GET_MIN(param3, 2<<31);
mca_ptl_elan_module.super.ptl_first_frag_size =
OMPI_PTL_ELAN_GET_MIN(param1, length);
mca_ptl_elan_module.super.ptl_min_frag_size =
OMPI_PTL_ELAN_GET_MAX(param2, length);
mca_ptl_elan_module.super.ptl_max_frag_size =
OMPI_PTL_ELAN_GET_MIN(param3, 2<<31);
/* initialize state */
elan_mp->elan_local = NULL;
elan_mp->num_modules = 0;
elan_mp->modules = NULL;
elan_mp->free_list_num = 32;
elan_mp->free_list_max = 128;
elan_mp->free_list_inc = 32;
/* initialize objects*/
OBJ_CONSTRUCT (&elan_mp->elan_procs, ompi_list_t);
@ -136,12 +142,12 @@ mca_ptl_elan_component_close (void)
free (elan_mp->elan_local);
}
if (NULL != elan_mp->elan_ptl_modules) {
if (NULL != elan_mp->modules) {
int i;
for (i = elan_mp->elan_num_ptl_modules; i > 0; i--) {
free (elan_mp->elan_ptl_modules[i - 1]);
for (i = elan_mp->num_modules; i > 0; i--) {
free (elan_mp->modules[i - 1]);
}
free (elan_mp->elan_ptl_modules);
free (elan_mp->modules);
}
}
@ -174,7 +180,7 @@ mca_ptl_elan_component_close (void)
* (3) register the list of PTL parameters with the MCA
*/
mca_ptl_base_module_t **
mca_ptl_elan_component_init (int *num_ptl_modules,
mca_ptl_elan_component_init (int *num_ptls,
bool * allow_multi_user_threads,
bool * have_hidden_threads)
{
@ -183,16 +189,17 @@ mca_ptl_elan_component_init (int *num_ptl_modules,
START_FUNC(PTL_ELAN_DEBUG_INIT);
/* TODO: support multiple threads */
*num_ptl_modules = 0;
*num_ptls = 0;
*allow_multi_user_threads = true;
*have_hidden_threads = OMPI_HAVE_THREADS;
ompi_free_list_init (&(elan_mp->elan_recv_frags_free),
sizeof (mca_ptl_elan_recv_frag_t),
OBJ_CLASS (mca_ptl_elan_recv_frag_t),
elan_mp->elan_free_list_num,
elan_mp->elan_free_list_max,
elan_mp->elan_free_list_inc, NULL);
/*32, 128, 32, */
elan_mp->free_list_num,
elan_mp->free_list_max,
elan_mp->free_list_inc, NULL);
/* open basic elan device */
if (OMPI_SUCCESS != ompi_mca_ptl_elan_init(&mca_ptl_elan_component)) {
@ -209,7 +216,7 @@ mca_ptl_elan_component_init (int *num_ptl_modules,
return NULL;
}
ptls = (mca_ptl_base_module_t **) malloc (elan_mp->elan_num_ptl_modules *
ptls = (mca_ptl_base_module_t **) malloc (elan_mp->num_modules *
sizeof (mca_ptl_elan_module_t *));
if (NULL == ptls) {
ompi_output(0,
@ -218,9 +225,9 @@ mca_ptl_elan_component_init (int *num_ptl_modules,
return NULL;
}
memcpy (ptls, elan_mp->elan_ptl_modules,
memcpy (ptls, elan_mp->modules,
elan_mp->num_modules * sizeof (mca_ptl_elan_module_t *));
*num_ptl_modules = elan_mp->elan_num_ptl_modules;
*num_ptls = elan_mp->num_modules;
mca_ptl_elan_component_initialized = true;
END_FUNC(PTL_ELAN_DEBUG_INIT);

Просмотреть файл

@ -68,6 +68,7 @@ ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
ptl->elan_nvp = ems->elan_nvp;
OBJ_CONSTRUCT (&ptl->recv_frags, ompi_list_t);
OBJ_CONSTRUCT (&ptl->send_frags, ompi_list_t);
OBJ_CONSTRUCT (&ptl->pending_acks, ompi_list_t);
emp->num_modules++;
} while (emp->num_modules < rail_count);
@ -248,6 +249,7 @@ ompi_module_elan_close_ptls (mca_ptl_elan_component_t * emp,
if (NULL == ptl) continue;
OBJ_DESTRUCT (&(ptl->recv_frags));
OBJ_DESTRUCT (&(ptl->send_frags));
OBJ_DESTRUCT (&(ptl->pending_acks));
}
}

Просмотреть файл

@ -576,7 +576,7 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
elan4_flush_cmdq_reorder (elan_ptl->queue->tx_cmdq);
/* Insert desc into the list of outstanding DMA's */
ompi_list_append (&elan_ptl->queue->tx_desc, (ompi_list_item_t *) desc);
ompi_list_append (&elan_ptl->send_frags, (ompi_list_item_t *) desc);
/* fragment state */
desc->desc->req = NULL;
@ -714,16 +714,14 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp)
rc = elan4_pollevent_word(ctx, &frag->desc->main_doneWord, 1);
#endif
if (rc) {
mca_ptl_elan_send_request_t *req;
struct ompi_ptl_elan_base_desc_t *basic;
/* Remove the desc, update the request, put back to free list */
frag = (mca_ptl_elan_send_frag_t *)
ompi_list_remove_first (&ptl->send_frags);
basic = (ompi_ptl_elan_qdma_desc_t*)frag->desc;
req = (mca_ptl_elan_send_request_t *)basic->req;
mca_ptl_elan_send_desc_done (frag, req);
mca_ptl_elan_send_desc_done (frag, basic->req);
INITEVENT_WORD (ctx, basic->elan_event, &basic->main_doneWord);
RESETEVENT_WORD (&basic->main_doneWord);
PRIMEEVENT_WORD (ctx, basic->elan_event, 1);

Просмотреть файл

@ -63,7 +63,7 @@
#define OMPI_PTL_ELAN_NUM_PUTGET (8)
#define OMPI_PTL_ELAN_MAX_PGDESC (8)
#define OMPI_PTL_ELAN_PTL_FASTPATH (0x1)
#define OMPI_PTL_ELAN_FASTPATH (0x1)
#define OMPI_PTL_ELAN_SLOT_ALIGN (128)
#define OMPI_PTL_ELAN_GET_MAX(a,b) ((a>b)? a:b)
#define OMPI_PTL_ELAN_GET_MIN(a,b) ((a<b)? a:b)