diff --git a/ompi/mca/btl/elan/btl_elan.c b/ompi/mca/btl/elan/btl_elan.c index f0984c27b8..b1252645e0 100644 --- a/ompi/mca/btl/elan/btl_elan.c +++ b/ompi/mca/btl/elan/btl_elan.c @@ -70,20 +70,21 @@ mca_btl_elan_module_t mca_btl_elan_module = { extern char** environ; int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t **ompi_procs, - struct mca_btl_base_endpoint_t** peers, - ompi_bitmap_t* reachable ) + size_t nprocs, + struct ompi_proc_t **ompi_procs, + struct mca_btl_base_endpoint_t** peers, + ompi_bitmap_t* reachable ) { mca_btl_elan_module_t* elan_btl = (mca_btl_elan_module_t*)btl; int i, rc; FILE* file; char* filename; - ELAN_BASE * base; - ELAN_STATE * state; - ELAN_QUEUE * q= NULL; - ELAN_TPORT * p= NULL; + ELAN_BASE *base; + ELAN_STATE *state; + ELAN_QUEUE *q = NULL; + ELAN_TPORT *p = NULL; + /* Create the mapid file in the temporary storage */ filename = opal_os_path( false, orte_process_info.proc_session_dir, "ELAN_ID", NULL ); file = fopen( filename, "w" ); for( i = 0; i < (int)nprocs; i++ ) { @@ -91,22 +92,26 @@ int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl, fprintf( file, "%s %d\n", ompi_proc->proc_hostname, i ); } fclose( file ); + + /* Set the environment before firing up the Elan library */ opal_setenv( "LIBELAN_MACHINES_FILE", filename, true, &environ ); - /* opal_setenv( "LIBELAN_MACHINES_FILE", "/home/tma/machinefile", false, &environ );*/ - opal_setenv( "MPIRUN_ELANIDMAP_FILE", "/etc/elanidmap", false, &environ ); + opal_setenv( "MPIRUN_ELANIDMAP_FILE", mca_btl_elan_component.elanidmap_file, + false, &environ ); + base = elan_baseInit(0); - if (base == NULL) + if( NULL == base ) return OMPI_ERR_OUT_OF_RESOURCE; state = base->state; if( NULL == state ) { mca_btl_base_error_no_nics( "ELAN", "Quadrics" ); return OMPI_ERR_OUT_OF_RESOURCE; } - elan_gsync(base->allGroup); - if ((q = elan_allocQueue(base->state)) == NULL) { + + /* Create the global queue (it's a synchronization point) */ + if( (q = elan_gallocQueue(base, base->allGroup)) == NULL ) { return OMPI_ERR_OUT_OF_RESOURCE; } - if (!(p = elan_tportInit(base->state, + if( !(p = elan_tportInit(base->state, (ELAN_QUEUE *)q, base->tport_nslots, base->tport_smallmsg, @@ -117,7 +122,7 @@ int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl, &base->shm_key, base->shm_fifodepth, base->shm_fragsize, - 0))) { + ELAN_TPORT_SHM_DISABLE | ELAN_TPORT_USERCOPY_DISABLE))) { return OMPI_ERR_OUT_OF_RESOURCE; } elan_btl->base = base; @@ -126,29 +131,35 @@ int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl, elan_btl->tport = p; elan_btl->elan_vp = state->vp; elan_btl->elan_nvp = state->nvp; + for(i = 0; i < (int) nprocs; i++) { struct ompi_proc_t* ompi_proc = ompi_procs[i]; mca_btl_elan_proc_t* elan_proc; mca_btl_base_endpoint_t* elan_endpoint; + + /* Don't use Elan for local communications */ + if( ompi_proc_local_proc == ompi_proc ) + continue; + if(NULL == (elan_proc = mca_btl_elan_proc_create(ompi_proc))) { return OMPI_ERR_OUT_OF_RESOURCE; } - OPAL_THREAD_LOCK(&elan_proc->proc_lock); elan_endpoint = OBJ_NEW(mca_btl_elan_endpoint_t); if(NULL == elan_endpoint) { - OPAL_THREAD_UNLOCK(&elan_proc->proc_lock); return OMPI_ERR_OUT_OF_RESOURCE; } elan_endpoint->endpoint_btl = elan_btl; + + OPAL_THREAD_LOCK(&elan_proc->proc_lock); rc = mca_btl_elan_proc_insert(elan_proc, elan_endpoint); - if(rc != OMPI_SUCCESS) { + OPAL_THREAD_UNLOCK(&elan_proc->proc_lock); + + if( OMPI_SUCCESS != rc ) { OBJ_RELEASE(elan_endpoint); OBJ_RELEASE(elan_proc); - OPAL_THREAD_UNLOCK(&elan_proc->proc_lock); continue; } ompi_bitmap_set_bit(reachable, i); - OPAL_THREAD_UNLOCK(&elan_proc->proc_lock); peers[i] = elan_endpoint; } return OMPI_SUCCESS; @@ -168,9 +179,9 @@ int mca_btl_elan_del_procs( struct mca_btl_base_module_t* btl, */ int mca_btl_elan_register( struct mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_module_recv_cb_fn_t cbfunc, - void* cbdata ) + mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, + void* cbdata ) { mca_btl_elan_module_t* elan_btl = (mca_btl_elan_module_t*) btl; void * tbuf = NULL; @@ -180,24 +191,24 @@ int mca_btl_elan_register( struct mca_btl_base_module_t* btl, elan_btl->elan_reg[tag].cbfunc = cbfunc; elan_btl->elan_reg[tag].cbdata = cbdata; if (NULL != cbfunc) { - /* Post the receives if there is no unexpected handler */ - MCA_BTL_TEMPLATE_FRAG_ALLOC_EAGER(frag, rc ); - if( NULL == frag ) { - return OMPI_ERROR; - } - frag->base.des_dst = &(frag->segment); - frag->base.des_dst_cnt = 1; - frag->base.des_src = NULL; - frag->base.des_src_cnt = 0; - frag->tag = tag; - frag->type = MCA_BTL_ELAN_HDR_TYPE_RECV; - tbuf = (void*)(frag+1); - send_len = elan_btl->super.btl_eager_limit; - desc = (bufdesc_t * )malloc (sizeof(struct bufdesc_t)); - desc->eve = elan_tportRxStart (elan_btl->tport,0 , 0, 0, 0xffffffff, frag->tag, tbuf,send_len) ; - desc->frag = frag; - desc->next = NULL; - BTL_ELAN_ADD_TO_FIFO(elan_btl, desc); + /* Post the receives if there is no unexpected handler */ + MCA_BTL_TEMPLATE_FRAG_ALLOC_EAGER(frag, rc ); + if( NULL == frag ) { + return OMPI_ERROR; + } + frag->base.des_dst = &(frag->segment); + frag->base.des_dst_cnt = 1; + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->tag = tag; + frag->type = MCA_BTL_ELAN_HDR_TYPE_RECV; + tbuf = (void*)(frag+1); + send_len = elan_btl->super.btl_eager_limit; + desc = (bufdesc_t * )malloc (sizeof(struct bufdesc_t)); + desc->eve = elan_tportRxStart (elan_btl->tport,0 , 0, 0, 0xffffffff, frag->tag, tbuf,send_len) ; + desc->frag = frag; + desc->next = NULL; + BTL_ELAN_ADD_TO_FIFO(elan_btl, desc); } return OMPI_SUCCESS; } @@ -216,6 +227,7 @@ mca_btl_base_descriptor_t* mca_btl_elan_alloc(struct mca_btl_base_module_t* btl, { mca_btl_elan_frag_t* frag; int rc; + if(size <= btl->btl_eager_limit){ MCA_BTL_TEMPLATE_FRAG_ALLOC_EAGER(frag, rc); if( OPAL_UNLIKELY(NULL == frag) ) { @@ -416,10 +428,11 @@ int mca_btl_elan_send( struct mca_btl_base_module_t* btl, frag->type = MCA_BTL_ELAN_HDR_TYPE_SEND; peer = endpoint->elan_vp; proc = elan_btl->elan_vp; - sbuf = (void *)frag->base.des_src->seg_addr.pval; - send_len = frag->base.des_src->seg_len; + sbuf = (void *)frag->base.des_src->seg_addr.pval; + send_len = frag->base.des_src->seg_len; desc = (bufdesc_t * )malloc (sizeof(struct bufdesc_t)); - desc->eve = elan_tportTxStart (elan_btl->tport, 0, peer, proc,frag->tag, sbuf, send_len) ; + desc->eve = elan_tportTxStart (elan_btl->tport, 0, peer, proc,frag->tag, + sbuf, send_len) ; /*opal_output( 0, "send message startoing from %d to %d\n", proc, peer );*/ desc->frag = frag; desc->next = NULL; @@ -503,15 +516,13 @@ int mca_btl_elan_get( mca_btl_base_module_t* btl, void cancel_elanRx(mca_btl_elan_module_t* elan_btl) { bufdesc_t * index = elan_btl->tportFIFOHead; - while(index!= NULL) - { - if(index->frag->type == MCA_BTL_ELAN_HDR_TYPE_RECV) - { - - if(elan_tportRxCancel(index->eve)) - MCA_BTL_TEMPLATE_FRAG_RETURN(index->frag); + while( NULL != index ) { + if( index->frag->type == MCA_BTL_ELAN_HDR_TYPE_RECV ) { + if( elan_tportRxCancel(index->eve) ) { + MCA_BTL_TEMPLATE_FRAG_RETURN(index->frag); } + } index = index->next; } } @@ -519,8 +530,13 @@ void cancel_elanRx(mca_btl_elan_module_t* elan_btl) int mca_btl_elan_finalize( struct mca_btl_base_module_t* btl ) { mca_btl_elan_module_t* elan_btl = (mca_btl_elan_module_t*) btl; + OBJ_DESTRUCT(&elan_btl->elan_lock); + cancel_elanRx(elan_btl); + /* disable the network */ + elan_disable_network( elan_btl->state ); + free(elan_btl); return OMPI_SUCCESS; } diff --git a/ompi/mca/btl/elan/btl_elan.h b/ompi/mca/btl/elan/btl_elan.h index 57cf1a5606..ff0ff5a8ec 100644 --- a/ompi/mca/btl/elan/btl_elan.h +++ b/ompi/mca/btl/elan/btl_elan.h @@ -90,6 +90,8 @@ struct mca_btl_elan_component_t { char* elan_mpool_name; /**< name of memory pool */ + char* elanidmap_file; /**< name of the ELANIDMAP file */ + bool leave_pinned; /**< pin memory on first use and leave pinned */ diff --git a/ompi/mca/btl/elan/btl_elan_component.c b/ompi/mca/btl/elan/btl_elan_component.c index f810c48630..0b4cab82cb 100644 --- a/ompi/mca/btl/elan/btl_elan_component.c +++ b/ompi/mca/btl/elan/btl_elan_component.c @@ -98,8 +98,8 @@ static inline int mca_btl_elan_param_register_int( int mca_btl_elan_component_open(void) { /* initialize state */ - mca_btl_elan_component.elan_num_btls=0; - mca_btl_elan_component.elan_btls=NULL; + mca_btl_elan_component.elan_num_btls = 0; + mca_btl_elan_component.elan_btls = NULL; /* register Elan4 component parameters */ mca_btl_elan_component.elan_free_list_num = @@ -123,6 +123,9 @@ int mca_btl_elan_component_open(void) mca_btl_elan_module.super.btl_latency = 5; mca_btl_base_param_register(&mca_btl_elan_component.super.btl_version, &mca_btl_elan_module.super); + + mca_btl_elan_component.elanidmap_file = + mca_btl_elan_param_register_string( "elanidmap", "/etc/elanidmap" ); return OMPI_SUCCESS; } @@ -135,8 +138,10 @@ int mca_btl_elan_component_close(void) { if( NULL != mca_btl_elan_component.elan_btls ) { free( mca_btl_elan_component.elan_btls ); - /* release resources */ + mca_btl_elan_component.elan_btls = NULL; + mca_btl_elan_component.elan_num_btls = 0; + /* release resources */ OBJ_DESTRUCT(&mca_btl_elan_component.elan_procs); OBJ_DESTRUCT(&mca_btl_elan_component.elan_frag_eager); OBJ_DESTRUCT(&mca_btl_elan_component.elan_frag_user); @@ -196,20 +201,21 @@ mca_btl_base_module_t** mca_btl_elan_component_init( int *num_btl_modules, NULL ); /* use default allocator */ - opal_setenv( "MPIRUN_ELANIDMAP_FILE", "/etc/elanidmap", false, &environ ); vpid = orte_process_info.my_name->vpid; - ompi_modex_send( &mca_btl_elan_component.super.btl_version, &vpid, sizeof(vpid)); + ompi_modex_send( &mca_btl_elan_component.super.btl_version, &vpid, + sizeof(vpid)); + mca_btl_elan_component.elan_num_btls = 1; - mca_btl_elan_component.elan_btls = malloc( (mca_btl_elan_component.elan_num_btls) * sizeof(mca_btl_base_module_t*) ); + mca_btl_elan_component.elan_btls = malloc( mca_btl_elan_component.elan_num_btls * sizeof(mca_btl_base_module_t*) ); for( i = count = 0; i < mca_btl_elan_component.elan_num_btls; i++ ) { mca_btl_elan_module_t* btl = malloc (sizeof (mca_btl_elan_module_t)); if(NULL == btl) continue; memcpy( btl, &mca_btl_elan_module, sizeof(mca_btl_elan_module_t) ); OBJ_CONSTRUCT (&btl->elan_lock, opal_mutex_t); - btl->tportFIFOHead=NULL; - btl->tportFIFOTail=NULL; + btl->tportFIFOHead = NULL; + btl->tportFIFOTail = NULL; mca_btl_elan_component.elan_btls[count++] = btl; } mca_btl_elan_component.elan_num_btls = count ; @@ -219,7 +225,8 @@ mca_btl_base_module_t** mca_btl_elan_component_init( int *num_btl_modules, mca_btl_elan_component.elan_num_btls = 0; /* no active BTL modules */ return NULL; } - memcpy( btls, mca_btl_elan_component.elan_btls, mca_btl_elan_component.elan_num_btls *sizeof(mca_btl_elan_module_t*) ); + memcpy( btls, mca_btl_elan_component.elan_btls, + mca_btl_elan_component.elan_num_btls *sizeof(mca_btl_elan_module_t*) ); *num_btl_modules = mca_btl_elan_component.elan_num_btls; return btls; } @@ -227,11 +234,7 @@ mca_btl_base_module_t** mca_btl_elan_component_init( int *num_btl_modules, /* * Elan4 component progress. */ - - - - -int mca_btl_elan_component_progress() +int mca_btl_elan_component_progress( void ) { size_t num_progressed = 0, i, no_btls, size; mca_btl_elan_frag_t* frag; @@ -245,59 +248,51 @@ int mca_btl_elan_component_progress() if(desc ==NULL) continue; frag = (mca_btl_elan_frag_t*) desc->frag; - if(frag!=NULL) - { - if(frag->type== MCA_BTL_ELAN_HDR_TYPE_SEND ) - { - /* it's a send */ - /* call the completion callback */ - elan_tportTxWait(desc->eve); - frag->base.des_cbfunc( &(elan_btl->super), frag->endpoint, &(frag->base), OMPI_SUCCESS ); - free(desc); - - } - else if(frag->type== MCA_BTL_ELAN_HDR_TYPE_PUT || frag->type== MCA_BTL_ELAN_HDR_TYPE_GET ) - { - /* it's a put*/ - /* call the completion callback */ - elan_wait(desc->eve,ELAN_WAIT_EVENT); - frag->base.des_cbfunc( &(elan_btl->super), frag->endpoint, &(frag->base), OMPI_SUCCESS ); - free(desc); - } - else{ - /* and this one is a receive */ - mca_btl_base_recv_reg_t* reg; - reg = &(elan_btl->elan_reg[frag->tag]); - elan_tportRxWait(desc->eve, NULL, NULL, &size); - frag->base.des_dst->seg_len = size; - reg->cbfunc( &(elan_btl->super), frag->tag, &(frag->base),reg->cbdata ); - /** - * The upper level extract the data from the fragment. - * Now we can register the fragment - * again with the elan BTL. - */ - desc->eve = elan_tportRxStart (elan_btl->tport, 0 , 0, 0, 0xffffffff, frag->tag, frag->base.des_dst->seg_addr.pval, mca_btl_elan_module.super.btl_eager_limit) ; - /*desc->eve = elan_tportRxStart (elan_btl->tport, ELAN_TPORT_RXANY , 0, 0, 0, 0, frag->base.des_dst->seg_addr.pval, mca_btl_elan_module.super.btl_eager_limit) ;*/ - - desc->frag = frag; - desc->next = NULL; - OPAL_THREAD_LOCK(&elan_btl->elan_lock); - if(elan_btl->tportFIFOTail) - { - elan_btl->tportFIFOTail->next = desc; - elan_btl->tportFIFOTail=desc; - } - else{ - elan_btl->tportFIFOHead = desc; - elan_btl->tportFIFOTail = desc; - } - OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); - } + if( NULL != frag ) { + if(frag->type== MCA_BTL_ELAN_HDR_TYPE_SEND ) { + /* it's a send */ + /* call the completion callback */ + elan_tportTxWait(desc->eve); + frag->base.des_cbfunc( &(elan_btl->super), frag->endpoint, &(frag->base), OMPI_SUCCESS ); + free(desc); + } else if( (frag->type == MCA_BTL_ELAN_HDR_TYPE_PUT) || + (frag->type== MCA_BTL_ELAN_HDR_TYPE_GET) ) { + /* it's a put*/ + /* call the completion callback */ + elan_wait(desc->eve,ELAN_WAIT_EVENT); + frag->base.des_cbfunc( &(elan_btl->super), frag->endpoint, &(frag->base), OMPI_SUCCESS ); + free(desc); } else { - opal_output( 0, "Something bad happened the frag == NULL\n" ); + /* and this one is a receive */ + mca_btl_base_recv_reg_t* reg; + reg = &(elan_btl->elan_reg[frag->tag]); + elan_tportRxWait(desc->eve, NULL, NULL, &size); + frag->base.des_dst->seg_len = size; + reg->cbfunc( &(elan_btl->super), frag->tag, &(frag->base),reg->cbdata ); + /** + * The upper level extract the data from the fragment. + * Now we can register the fragment + * again with the elan BTL. + */ + desc->eve = elan_tportRxStart (elan_btl->tport, 0 , 0, 0, 0xffffffff, frag->tag, frag->base.des_dst->seg_addr.pval, mca_btl_elan_module.super.btl_eager_limit) ; + /*desc->eve = elan_tportRxStart (elan_btl->tport, ELAN_TPORT_RXANY , 0, 0, 0, 0, frag->base.des_dst->seg_addr.pval, mca_btl_elan_module.super.btl_eager_limit) ;*/ + + desc->frag = frag; + desc->next = NULL; + OPAL_THREAD_LOCK(&elan_btl->elan_lock); + if( elan_btl->tportFIFOTail ) { + elan_btl->tportFIFOTail->next = desc; + elan_btl->tportFIFOTail=desc; + } else { + elan_btl->tportFIFOHead = desc; + elan_btl->tportFIFOTail = desc; + } + OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); } + } else { + opal_output( 0, "Something bad happened the frag == NULL\n" ); + } num_progressed++; - } return num_progressed;