diff --git a/ompi/mca/btl/elan/.ompi_ignore b/ompi/mca/btl/elan/.ompi_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ompi/mca/btl/elan/.ompi_unignore b/ompi/mca/btl/elan/.ompi_unignore new file mode 100644 index 0000000000..7a3d8b6c04 --- /dev/null +++ b/ompi/mca/btl/elan/.ompi_unignore @@ -0,0 +1,2 @@ +bosilca +tma diff --git a/ompi/mca/btl/elan/Makefile.am b/ompi/mca/btl/elan/Makefile.am new file mode 100644 index 0000000000..6f351f56fc --- /dev/null +++ b/ompi/mca/btl/elan/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (c) 2004-2007 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Use the top-level Makefile.options + + + +AM_CPPFLAGS = $(btl_elan_CPPFLAGS) + +btl_elan_sources = btl_elan.c btl_elan.h btl_elan_component.c btl_elan_endpoint.c \ + btl_elan_endpoint.h btl_elan_frag.c btl_elan_frag.h btl_elan_proc.c btl_elan_proc.h + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_btl_elan_DSO +component_noinst = +component_install = mca_btl_elan.la +else +component_noinst = libmca_btl_elan.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_btl_elan_la_SOURCES = $(btl_elan_sources) +mca_btl_elan_la_LIBADD = \ + $(btl_elan_LIBS) \ + $(top_ompi_builddir)/ompi/libmpi.la \ + $(top_ompi_builddir)/orte/libopen-rte.la \ + $(top_ompi_builddir)/opal/libopen-pal.la +mca_btl_elan_la_LDFLAGS = -module -avoid-version $(btl_elan_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_btl_elan_la_SOURCES = $(btl_elan_sources) +libmca_btl_elan_la_LIBADD = $(btl_elan_LIBS) +libmca_btl_elan_la_LDFLAGS = -module -avoid-version $(btl_elan_LDFLAGS) diff --git a/ompi/mca/btl/elan/btl_elan.c b/ompi/mca/btl/elan/btl_elan.c new file mode 100644 index 0000000000..a48184f8f5 --- /dev/null +++ b/ompi/mca/btl/elan/btl_elan.c @@ -0,0 +1,513 @@ +/* + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "opal/util/output.h" +#include "opal/util/if.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/btl/btl.h" + +#include "btl_elan.h" +#include "btl_elan_frag.h" +#include "btl_elan_proc.h" +#include "btl_elan_endpoint.h" + +#include "ompi/datatype/convertor.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/mca/mpool/mpool.h" + +#include "opal/util/os_path.h" +#include "opal/util/opal_environ.h" + +mca_btl_elan_module_t mca_btl_elan_module = { + { + &mca_btl_elan_component.super, + 0, /* max size of first fragment */ + 0, /* min send fragment size */ + 0, /* max send fragment size */ + 0, /* btl_rdma_pipeline_offset */ + 0, /* btl_rdma_pipeline_frag_size */ + 0, /* btl_min_rdma_pipeline_size */ + 0, /* exclusivity */ + 0, /* latency */ + 0, /* bandwidth */ + 0, /* flags */ + mca_btl_elan_add_procs, + mca_btl_elan_del_procs, + mca_btl_elan_register, + mca_btl_elan_finalize, + mca_btl_elan_alloc, + mca_btl_elan_free, + mca_btl_elan_prepare_src, + mca_btl_elan_prepare_dst, + mca_btl_elan_send, + mca_btl_elan_put, + NULL, /*mca_btl_elan_get,*/ + NULL, /*mca_btl_elan_dump,*/ + NULL, /* mpool */ + NULL, /* register error cb */ + mca_btl_elan_ft_event /* mca_btl_elan_ft_event*/ + } +}; + +/** + * + */ +extern char** environ; + +int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **ompi_procs, + struct mca_btl_base_endpoint_t** peers, + ompi_bitmap_t* reachable ) +{ + mca_btl_elan_module_t* elan_btl = (mca_btl_elan_module_t*)btl; + int i, rc; + for(i = 0; i < (int) nprocs; i++) { + struct ompi_proc_t* ompi_proc = ompi_procs[i]; + mca_btl_elan_proc_t* elan_proc; + mca_btl_base_endpoint_t* elan_endpoint; + + if(NULL == (elan_proc = mca_btl_elan_proc_create(ompi_proc))) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + OPAL_THREAD_LOCK(&elan_proc->proc_lock); + elan_endpoint = OBJ_NEW(mca_btl_elan_endpoint_t); + if(NULL == elan_endpoint) { + OPAL_THREAD_UNLOCK(&elan_proc->proc_lock); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + elan_endpoint->endpoint_btl = elan_btl; + rc = mca_btl_elan_proc_insert(elan_proc, elan_endpoint); + if(rc != OMPI_SUCCESS) { + OBJ_RELEASE(elan_endpoint); + OBJ_RELEASE(elan_proc); + OPAL_THREAD_UNLOCK(&elan_proc->proc_lock); + continue; + } + + ompi_bitmap_set_bit(reachable, i); + OPAL_THREAD_UNLOCK(&elan_proc->proc_lock); + peers[i] = elan_endpoint; + + } + + return OMPI_SUCCESS; +} + +int mca_btl_elan_del_procs( struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_btl_base_endpoint_t ** endpoints ) +{ + return OMPI_SUCCESS; +} + + +/** + * Register callback function to support send/recv semantics + */ + +int mca_btl_elan_register( struct mca_btl_base_module_t* btl, + mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, + void* cbdata ) +{ + mca_btl_elan_module_t* elan_btl = (mca_btl_elan_module_t*) btl; + void * tbuf = NULL; + int rc; + bufdesc_t *desc; + mca_btl_elan_frag_t* frag; + + elan_btl->elan_reg[tag].cbfunc = cbfunc; + elan_btl->elan_reg[tag].cbdata = cbdata; + if (NULL != cbfunc) { + /* Post the receives if there is no unexpected handler */ + MCA_BTL_TEMPLATE_FRAG_ALLOC_EAGER(frag, rc ); + if( NULL == frag ) { + return OMPI_ERROR; + } + frag->base.des_dst = &(frag->segment); + frag->base.des_dst_cnt = 1; + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->tag = tag; + frag->type = MCA_BTL_ELAN_HDR_TYPE_RECV; + tbuf = (void*)(frag+1); + desc = (bufdesc_t * )malloc (sizeof(struct bufdesc_t)); + desc->eve = elan_tportRxStart( elan_btl->tport, 0, 0, 0, BTL_ELAN_RECV_MASK, + frag->tag, tbuf, elan_btl->super.btl_eager_limit ); + desc->frag = frag; + desc->next = NULL; + + BTL_ELAN_ADD_TO_FIFO( elan_btl, desc ); + } + return OMPI_SUCCESS; +} + + +/** + * Allocate a segment. + * + * @param btl (IN) BTL module + * @param size (IN) Request segment size. + */ + +mca_btl_base_descriptor_t* mca_btl_elan_alloc(struct mca_btl_base_module_t* btl, + uint8_t order, + size_t size ) +{ + + mca_btl_elan_frag_t* frag; + int rc; + + + if(size <= btl->btl_eager_limit){ + MCA_BTL_TEMPLATE_FRAG_ALLOC_EAGER(frag, rc); + if( OPAL_UNLIKELY(NULL == frag) ) { + return NULL; + } + + frag->segment.seg_len = size; + } else if (size <= btl->btl_max_send_size){ + MCA_BTL_TEMPLATE_FRAG_ALLOC_MAX(frag, rc); + if( OPAL_UNLIKELY(NULL == frag) ) { + return NULL; + } + + frag->segment.seg_len = size; + } else { + return NULL; + } + frag->segment.seg_addr.pval = (void*)(frag+1); + frag->base.des_src = &(frag->segment); + frag->base.des_src_cnt = 1; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + frag->base.des_flags = 0; + frag->btl = (mca_btl_elan_module_t*)btl; + frag->base.order = MCA_BTL_NO_ORDER; + return (mca_btl_base_descriptor_t*)frag; + +} + + +/** + * Return a segment + */ + +int mca_btl_elan_free( struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des ) +{ + mca_btl_elan_frag_t* frag = (mca_btl_elan_frag_t*)des; + MCA_BTL_TEMPLATE_FRAG_RETURN(frag); + return OMPI_SUCCESS; +} + +/** + * Pack data and return a descriptor that can be + * used for send/put. + * + * @param btl (IN) BTL module + * @param peer (IN) BTL peer addressing + */ +mca_btl_base_descriptor_t* mca_btl_elan_prepare_src( struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size ) + + +{ + mca_btl_elan_frag_t* frag; + struct iovec iov; + uint32_t iov_count = 1; + size_t max_data = *size; + int rc; + if( OPAL_UNLIKELY(max_data > UINT32_MAX) ) { + max_data = (size_t)UINT32_MAX; + } + if (max_data+reserve <= btl->btl_eager_limit) { + MCA_BTL_TEMPLATE_FRAG_ALLOC_EAGER(frag, rc); + if(NULL == frag) { + return NULL; + } + iov.iov_len = max_data; + iov.iov_base = (void*)((unsigned char*) frag->segment.seg_addr.pval + reserve); + rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data ); + *size = max_data; + if( rc < 0 ) { + MCA_BTL_TEMPLATE_FRAG_RETURN(frag); + return NULL; + } + frag->segment.seg_len = max_data + reserve; + } + + + else { + + MCA_BTL_TEMPLATE_FRAG_ALLOC_MAX(frag, rc); + if(NULL == frag) { + return NULL; + } + + if(max_data + reserve > btl->btl_max_send_size){ + max_data = btl->btl_max_send_size - reserve; + } + iov.iov_len = max_data; + iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve; + + rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data ); + if( rc < 0 ) { + MCA_BTL_TEMPLATE_FRAG_RETURN(frag); + return NULL; + } + *size = max_data; + } + frag->segment.seg_len = max_data + reserve; + frag->base.des_src = &(frag->segment); + frag->base.des_src_cnt = 1; + frag->base.order = MCA_BTL_NO_ORDER; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + frag->base.des_flags = 0; + return &frag->base; +} + +/** + * Prepare a descriptor for send/rdma using the supplied + * convertor. If the convertor references data that is contigous, + * the descriptor may simply point to the user buffer. Otherwise, + * this routine is responsible for allocating buffer space and + * packing if required. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL peer addressing + * @param convertor (IN) Data type convertor + * @param reserve (IN) Additional bytes requested by upper layer to precede user data + * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) + */ + +mca_btl_base_descriptor_t* mca_btl_elan_prepare_dst( struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size ) +{ + + mca_btl_elan_frag_t* frag; + int rc; + if( OPAL_UNLIKELY((*size) > UINT32_MAX) ) { + *size = (size_t)UINT32_MAX; + } + + MCA_BTL_TEMPLATE_FRAG_ALLOC_USER(frag, rc); + if(NULL == frag) { + return NULL; + } + + frag->segment.seg_len = *size; + ompi_convertor_get_current_pointer( convertor, (void**)&(frag->segment.seg_addr.pval) ); + /*frag->segment.seg_addr.pval = convertor->pBaseBuf + convertor->bConverted;*/ + frag->type = MCA_BTL_ELAN_HDR_TYPE_PUT; + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->base.des_flags = 0; + + frag->base.des_dst = &(frag->segment); + frag->base.des_dst_cnt = 1; + + frag->base.order = MCA_BTL_NO_ORDER; + return &frag->base; + +} + + +/** + * Initiate an asynchronous send. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transfered + * @param tag (IN) The tag value used to notify the peer. + */ + +int mca_btl_elan_send( struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_btl_base_descriptor_t* descriptor, + mca_btl_base_tag_t tag ) + +{ + mca_btl_elan_module_t* elan_btl = (mca_btl_elan_module_t*) btl; + mca_btl_elan_frag_t* frag = (mca_btl_elan_frag_t*)descriptor; + int peer, proc, send_len; + void *sbuf = NULL; + bufdesc_t * desc; + + /* TODO */ + /*opal_output(0, "I am send,tag is %d\n", tag);*/ + frag->btl = elan_btl; + frag->endpoint = endpoint; + frag->tag = tag; + frag->type = MCA_BTL_ELAN_HDR_TYPE_SEND; + + peer = endpoint->elan_vp; + proc = elan_btl->elan_vp; + + sbuf = (void *)frag->base.des_src->seg_addr.pval; + send_len = frag->base.des_src->seg_len; + desc = (bufdesc_t * )malloc (sizeof(struct bufdesc_t)); + desc->eve = elan_tportTxStart (elan_btl->tport, 0, peer, proc,frag->tag, sbuf, send_len) ; + /*opal_output( 0, "send message startoing from %p with ;length %d\n", sbuf, send_len );*/ + + desc->frag = frag; + desc->next = NULL; + BTL_ELAN_ADD_TO_FIFO( elan_btl, desc ); + return OMPI_SUCCESS; +} + + +/** + * Initiate an asynchronous put. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + */ + +int mca_btl_elan_put( mca_btl_base_module_t* btl, + mca_btl_base_endpoint_t* endpoint, + mca_btl_base_descriptor_t* des ) +{ + mca_btl_elan_module_t* elan_btl = (mca_btl_elan_module_t*) btl; + mca_btl_elan_frag_t* frag = (mca_btl_elan_frag_t*) des; + int peer = endpoint->elan_vp; + mca_btl_base_segment_t* src = des->des_src; + mca_btl_base_segment_t* dst = des->des_dst; + unsigned char* src_addr = (unsigned char*)src->seg_addr.pval; + size_t src_len = src->seg_len; + unsigned char* dst_addr = (unsigned char*)ompi_ptr_ltop(dst->seg_addr.lval); + /*size_t dst_len = dst->seg_len;*/ + bufdesc_t * desc = (bufdesc_t * )malloc (sizeof(struct bufdesc_t)); + frag->endpoint = endpoint; + frag->btl = elan_btl; + frag->type = MCA_BTL_ELAN_HDR_TYPE_PUT; + desc->eve = elan_put(elan_btl->state, src_addr, dst_addr, src_len, peer); + desc->frag = frag; + desc->next = NULL; + + BTL_ELAN_ADD_TO_FIFO( elan_btl, desc ); + + return OMPI_SUCCESS; +} + + +/** + * Initiate an asynchronous get. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + * + */ + +int mca_btl_elan_get( mca_btl_base_module_t* btl, + mca_btl_base_endpoint_t* endpoint, + mca_btl_base_descriptor_t* descriptor ) +{ + /* mca_btl_elan_module_t* elan_btl = (mca_btl_elan_module_t*) btl; */ + mca_btl_elan_frag_t* frag = (mca_btl_elan_frag_t*) descriptor; + frag->endpoint = endpoint; + /* TODO */ + return OMPI_ERR_NOT_IMPLEMENTED; +} + + +/* + * Cleanup/release module resources. + */ + +void cancel_elanRx(mca_btl_elan_module_t* elan_btl) +{ + bufdesc_t * index = elan_btl->tportFIFOHead; + int i=0; + while(index!= NULL) + { + if(index->frag->type == MCA_BTL_ELAN_HDR_TYPE_RECV) + { + if(elan_tportRxCancel(index->eve)) + { + i++; + MCA_BTL_TEMPLATE_FRAG_RETURN(index->frag); + } + } + index = index->next; + } + opal_output(0, "cancel no. is %d\n", i); +} + +int mca_btl_elan_finalize( struct mca_btl_base_module_t* btl ) +{ + mca_btl_elan_module_t* elan_btl = (mca_btl_elan_module_t*) btl; + OBJ_DESTRUCT(&elan_btl->elan_lock); + cancel_elanRx(elan_btl); + free(elan_btl); + return OMPI_SUCCESS; +} + +int mca_btl_elan_ft_event(int state) { + if(OPAL_CRS_CHECKPOINT == state) { + ; + } + else if(OPAL_CRS_CONTINUE == state) { + ; + } + else if(OPAL_CRS_RESTART == state) { + ; + } + else if(OPAL_CRS_TERM == state ) { + ; + } + else { + ; + } + + return OMPI_SUCCESS; +} + + +bufdesc_t * elan_ipeek(mca_btl_elan_module_t* elan_btl) +{ + bufdesc_t * desc = elan_btl->tportFIFOHead; + + if( NULL == desc ) + return NULL; + if( MCA_BTL_ELAN_HDR_TYPE_RECV == desc->frag->type ) { + if( !elan_tportRxDone(desc->eve) ) + return NULL; + } else if( MCA_BTL_ELAN_HDR_TYPE_SEND == desc->frag->type ) { + if( !elan_tportTxDone(desc->eve) ) + return NULL; + } else { + if( !elan_done(desc->eve,0) ) + return NULL; + } + + elan_btl->tportFIFOHead = elan_btl->tportFIFOHead->next; + if( NULL == elan_btl->tportFIFOHead ) + elan_btl->tportFIFOTail = NULL; + return desc; +} diff --git a/ompi/mca/btl/elan/btl_elan.h b/ompi/mca/btl/elan/btl_elan.h new file mode 100644 index 0000000000..e259c2b3f8 --- /dev/null +++ b/ompi/mca/btl/elan/btl_elan.h @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + */ +#ifndef MCA_PTL_ELAN_H +#define MCA_PTL_ELAN_H + +#include "ompi_config.h" + + +/* Standard system includes */ +#include +#include +#include + +/* Open MPI includes */ +#include "ompi/class/ompi_free_list.h" +#include "ompi/class/ompi_bitmap.h" +#include "orte/class/orte_pointer_array.h" +#include "opal/event/event.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/btl/btl.h" +#include "opal/util/output.h" +#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/btl/base/btl_base_error.h" + +#include "ompi/mca/btl/btl.h" +#include "ompi/mca/btl/base/base.h" +#include "btl_elan_endpoint.h" +#include "btl_elan_frag.h" + +#include "elan3/elan3.h" +#include "elan/elan.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +#define MCA_BTL_HAS_MPOOL 1 +#define BTL_ELAN_RECV_MASK 0xffffffffULL +#define BTL_ELAN_PUT_MASK 0xffffffffULL +/** + * ELAN BTL component. + */ + +struct mca_btl_elan_component_t { + mca_btl_base_component_1_0_1_t super; /**< base BTL component */ + + uint32_t ib_max_btls; + /**< maximum number of hcas available to the ELAN component */ + + uint32_t elan_num_btls; + /**< number of hcas available to the ELAN component */ + + struct mca_btl_elan_module_t **elan_btls; + /**< array of available BTL modules */ + + int elan_free_list_num; + /**< initial size of free lists */ + + int elan_free_list_max; + /**< maximum size of free lists */ + + int elan_free_list_inc; + /**< number of elements to alloc when growing free lists */ + + + /* free list of fragment descriptors */ + ompi_free_list_t elan_frag_eager; + ompi_free_list_t elan_frag_max; + ompi_free_list_t elan_frag_user; + + + opal_list_t elan_procs; + /**< list of elan proc structures */ + + opal_mutex_t elan_lock; + /**< lock for accessing module state */ + + + char* elan_mpool_name; + /**< name of memory pool */ + + bool leave_pinned; + /**< pin memory on first use and leave pinned */ + +}; +typedef struct mca_btl_elan_component_t mca_btl_elan_component_t; + +OMPI_MODULE_DECLSPEC extern mca_btl_elan_component_t mca_btl_elan_component; + + +/** + * BTL Module Interface + */ + +struct mca_btl_elan_module_t { + mca_btl_base_module_t super; /**< base BTL interface */ + mca_btl_base_recv_reg_t elan_reg[MCA_BTL_TAG_MAX]; + ELAN_STATE *state; + ELAN_BASE *base; + ELAN_TPORT *tport; /* What we actually use for moving messages */ + ELAN_QUEUE *queue; + ELAN_GROUP *group; /* The group with everyone in */ + unsigned int elan_vp; /**< elan vpid, not ompi vpid */ + unsigned int elan_nvp; /**< total # of elan vpid */ + opal_mutex_t elan_lock; + struct bufdesc_t * tportFIFOHead; + struct bufdesc_t * tportFIFOTail; +#if defined MCA_BTL_HAS_MPOOL + struct mca_mpool_base_module_t* elan_mpool; +#endif +}; +typedef struct mca_btl_elan_module_t mca_btl_elan_module_t; +extern mca_btl_elan_module_t mca_btl_elan_module; + +struct bufdesc_t { + ELAN_EVENT * eve; + struct mca_btl_elan_frag_t * frag; + struct bufdesc_t * next; +}; +typedef struct bufdesc_t bufdesc_t; + + +/** + * Register ELAN component parameters with the MCA framework + */ +extern int mca_btl_elan_component_open(void); + +/** + * Any final cleanup before being unloaded. + */ +extern int mca_btl_elan_component_close(void); + +/** + * ELAN component initialization. + * + * @param num_btl_modules (OUT) Number of BTLs returned in BTL array. + * @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE) + * @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE) + */ +extern mca_btl_base_module_t** mca_btl_elan_component_init( + int *num_btl_modules, + bool allow_multi_user_threads, + bool have_hidden_threads +); + + +/** + * ELAN component progress. + */ +extern int mca_btl_elan_component_progress(void); + + + +/** + * Cleanup any resources held by the BTL. + * + * @param btl BTL instance. + * @return OMPI_SUCCESS or error status on failure. + */ + +extern void cancel_elanRx( + mca_btl_elan_module_t* elan_btl +); + + +extern int mca_btl_elan_finalize( + struct mca_btl_base_module_t* btl +); + +extern int mca_btl_elan_ft_event(int state); + +/** + * PML->BTL notification of change in the process list. + * + * @param btl (IN) + * @param nprocs (IN) Number of processes + * @param procs (IN) Set of processes + * @param peers (OUT) Set of (optional) peer addressing info. + * @param peers (IN/OUT) Set of processes that are reachable via this BTL. + * @return OMPI_SUCCESS or error status on failure. + * + */ + +extern int mca_btl_elan_add_procs( + struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_btl_base_endpoint_t** peers, + ompi_bitmap_t* reachable +); + +/** + * PML->BTL notification of change in the process list. + * + * @param btl (IN) BTL instance + * @param nproc (IN) Number of processes. + * @param procs (IN) Set of processes. + * @param peers (IN) Set of peer data structures. + * @return Status indicating if cleanup was successful + * + */ + +extern int mca_btl_elan_del_procs( + struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_btl_base_endpoint_t** peers +); + + +/** + * Initiate an asynchronous send. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transfered + * @param tag (IN) The tag value used to notify the peer. + */ + +extern int mca_btl_elan_send( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* btl_peer, + struct mca_btl_base_descriptor_t* descriptor, + mca_btl_base_tag_t tag +); + + +/** + * Initiate an asynchronous put. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + */ + +extern int mca_btl_elan_put( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* btl_peer, + struct mca_btl_base_descriptor_t* decriptor +); + + +/** + * Initiate an asynchronous get. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + */ + +extern int mca_btl_elan_get( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* btl_peer, + struct mca_btl_base_descriptor_t* decriptor +); + +/** + * Register a callback function that is called on receipt + * of a fragment. + * + * @param btl (IN) BTL module + * @return Status indicating if registration was successful + * + */ + +extern int mca_btl_elan_register( + struct mca_btl_base_module_t* btl, + mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, + void* cbdata); + +/** + * Allocate a descriptor with a segment of the requested size. + * Note that the BTL layer may choose to return a smaller size + * if it cannot support the request. + * + * @param btl (IN) BTL module + * @param size (IN) Request segment size. + */ + +extern mca_btl_base_descriptor_t* mca_btl_elan_alloc( + struct mca_btl_base_module_t* btl, + uint8_t order, + size_t size); + + +/** + * Return a segment allocated by this BTL. + * + * @param btl (IN) BTL module + * @param descriptor (IN) Allocated descriptor. + */ + +extern int mca_btl_elan_free( + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des); + + +/** + * Prepare a descriptor for send/rdma using the supplied + * convertor. If the convertor references data that is contigous, + * the descriptor may simply point to the user buffer. Otherwise, + * this routine is responsible for allocating buffer space and + * packing if required. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL peer addressing + * @param convertor (IN) Data type convertor + * @param reserve (IN) Additional bytes requested by upper layer to precede user data + * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) +*/ + +mca_btl_base_descriptor_t* mca_btl_elan_prepare_src( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* peer, + struct mca_mpool_base_registration_t*, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size +); + +extern mca_btl_base_descriptor_t* mca_btl_elan_prepare_dst( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* peer, + struct mca_mpool_base_registration_t*, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size); + + +extern bufdesc_t * elan_ipeek(mca_btl_elan_module_t* elan_btl); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#define BTL_ELAN_ADD_TO_FIFO(BTL, DESC) \ + do { \ + OPAL_THREAD_LOCK(&elan_btl->elan_lock); \ + if( (BTL)->tportFIFOTail ) { \ + (BTL)->tportFIFOTail->next = (DESC); \ + (BTL)->tportFIFOTail = (DESC); \ + } else { \ + (BTL)->tportFIFOHead = (DESC); \ + (BTL)->tportFIFOTail = (DESC); \ + } \ + OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); \ + } while(0) + +#endif diff --git a/ompi/mca/btl/elan/btl_elan_component.c b/ompi/mca/btl/elan/btl_elan_component.c new file mode 100644 index 0000000000..2eaaec7ffa --- /dev/null +++ b/ompi/mca/btl/elan/btl_elan_component.c @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "ompi/constants.h" +#include "opal/event/event.h" +#include "opal/util/if.h" +#include "opal/util/argv.h" +#include "opal/util/output.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/btl/btl.h" + +#include "opal/mca/base/mca_base_param.h" +#include "ompi/runtime/ompi_module_exchange.h" +#include "orte/mca/errmgr/errmgr.h" +#include "ompi/mca/mpool/base/base.h" +#include "btl_elan.h" +#include "btl_elan_frag.h" +#include "btl_elan_endpoint.h" + +#include "ompi/mca/btl/base/base.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/datatype/convertor.h" + +#include "elan/elan.h" + +#include "opal/util/os_path.h" +#include "opal/util/opal_environ.h" + +mca_btl_elan_component_t mca_btl_elan_component = { + { + /* First, the mca_base_component_t struct containing meta information + about the component itself */ + + { + /* Indicate that we are a pml v1.0.0 component (which also implies a + specific MCA version) */ + MCA_BTL_BASE_VERSION_1_0_1, + "elan", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + mca_btl_elan_component_open, /* component open */ + mca_btl_elan_component_close /* component close */ + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* Whether the component is checkpointable or not */ + false + }, + + mca_btl_elan_component_init, + mca_btl_elan_component_progress, + } +}; + + +/* + * utility routines for parameter registration + */ + +static inline char* mca_btl_elan_param_register_string( + const char* param_name, + const char* default_value) +{ + char *param_value; + int id = mca_base_param_register_string("btl","elan",param_name,NULL,default_value); + mca_base_param_lookup_string(id, ¶m_value); + return param_value; +} + +static inline int mca_btl_elan_param_register_int( + const char* param_name, + int default_value) +{ + int id = mca_base_param_register_int("btl","elan",param_name,NULL,default_value); + int param_value = default_value; + mca_base_param_lookup_int(id,¶m_value); + return param_value; +} + +/* + * Called by MCA framework to open the component, registers + * component parameters. + */ + +int mca_btl_elan_component_open(void) +{ + /* initialize state */ + mca_btl_elan_component.elan_num_btls=0; + mca_btl_elan_component.elan_btls=NULL; + + /* register Elan4 component parameters */ + mca_btl_elan_component.elan_free_list_num = + mca_btl_elan_param_register_int ("free_list_num", 8); + mca_btl_elan_component.elan_free_list_max = + mca_btl_elan_param_register_int ("free_list_max", 128); + mca_btl_elan_component.elan_free_list_inc = + mca_btl_elan_param_register_int ("free_list_inc", 32); + mca_btl_elan_component.elan_mpool_name = + mca_btl_elan_param_register_string("mpool", "elan"); + mca_btl_elan_module.super.btl_exclusivity = 0; + mca_btl_elan_module.super.btl_eager_limit = 32*1024; + mca_btl_elan_module.super.btl_min_send_size = 32*1024; + mca_btl_elan_module.super.btl_max_send_size = 32*1024; /*64*1024;*/ + mca_btl_elan_module.super.btl_rdma_pipeline_send_length = 512 *1024; + mca_btl_elan_module.super.btl_rdma_pipeline_frag_size = 128 *1024; + mca_btl_elan_module.super.btl_min_rdma_pipeline_size = 128* 1024; + mca_btl_elan_module.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | /*MCA_BTL_FLAGS_PUT | */MCA_BTL_FLAGS_SEND; + mca_btl_elan_module.super.btl_bandwidth = 2000; + mca_btl_elan_module.super.btl_latency = 5; + mca_btl_base_param_register(&mca_btl_elan_component.super.btl_version, + &mca_btl_elan_module.super); + + return OMPI_SUCCESS; +} + +/* + * component cleanup - sanity checking of queue lengths + */ + +int mca_btl_elan_component_close(void) +{ + if( NULL != mca_btl_elan_component.elan_btls ) + free( mca_btl_elan_component.elan_btls ); + /* release resources */ + + OBJ_DESTRUCT(&mca_btl_elan_component.elan_procs); + OBJ_DESTRUCT(&mca_btl_elan_component.elan_frag_eager); + OBJ_DESTRUCT(&mca_btl_elan_component.elan_frag_user); + OBJ_DESTRUCT(&mca_btl_elan_component.elan_frag_max); + OBJ_DESTRUCT(&mca_btl_elan_component.elan_lock); + + return OMPI_SUCCESS; +} + +/* + * Elan4 component initialization: + * (1) read interface list from kernel and compare against component parameters + * then create a BTL instance for selected interfaces + * (2) setup Elan4 listen socket for incoming connection attempts + * (3) register BTL parameters with the MCA + */ +mca_btl_base_module_t** mca_btl_elan_component_init( int *num_btl_modules, + bool enable_progress_threads, + bool enable_mpi_threads ) +{ + + mca_btl_base_module_t** btls; + size_t rails, i , count; + ELAN_BASE * base; + ELAN_STATE * state; + ELAN_QUEUE * q= NULL; + ELAN_TPORT * p= NULL; + + *num_btl_modules = 0; + if (enable_progress_threads) { + ompi_modex_send(&mca_btl_elan_component.super.btl_version, NULL, 0); + return NULL; + } + OBJ_CONSTRUCT (&mca_btl_elan_component.elan_lock, opal_mutex_t); + OBJ_CONSTRUCT (&mca_btl_elan_component.elan_frag_eager, ompi_free_list_t); + OBJ_CONSTRUCT (&mca_btl_elan_component.elan_frag_max, ompi_free_list_t); + OBJ_CONSTRUCT (&mca_btl_elan_component.elan_frag_user, ompi_free_list_t); + OBJ_CONSTRUCT(&mca_btl_elan_component.elan_procs, opal_list_t); + ompi_free_list_init( &mca_btl_elan_component.elan_frag_eager, + sizeof(mca_btl_elan_frag_t) + mca_btl_elan_module.super.btl_eager_limit, + OBJ_CLASS(mca_btl_elan_frag_t), + mca_btl_elan_component.elan_free_list_num, + mca_btl_elan_component.elan_free_list_max, + mca_btl_elan_component.elan_free_list_inc, + NULL ); /* use default allocator */ + + ompi_free_list_init( &mca_btl_elan_component.elan_frag_user, + sizeof(mca_btl_elan_frag_t), + OBJ_CLASS(mca_btl_elan_frag_t), + mca_btl_elan_component.elan_free_list_num, + mca_btl_elan_component.elan_free_list_max, + mca_btl_elan_component.elan_free_list_inc, + NULL ); /* use default allocator */ + + ompi_free_list_init( &mca_btl_elan_component.elan_frag_max, + sizeof(mca_btl_elan_frag_t)+mca_btl_elan_module.super.btl_max_send_size, + OBJ_CLASS(mca_btl_elan_frag_t), + mca_btl_elan_component.elan_free_list_num, + mca_btl_elan_component.elan_free_list_max, + mca_btl_elan_component.elan_free_list_inc, + NULL ); /* use default allocator */ + + opal_setenv( "LIBELAN_MACHINES_FILE", "/home/tma/machinefile", false, &environ ); + opal_setenv( "MPIRUN_ELANIDMAP_FILE", "/etc/elanidmap", false, &environ ); + base = elan_baseInit(0); + if (base == NULL) + return NULL; + state = base->state; + if( NULL == state ) { + mca_btl_base_error_no_nics( "ELAN", "Quadrics" ); + return NULL; + } + elan_gsync(base->allGroup); + ompi_modex_send( &mca_btl_elan_component.super.btl_version, &state->vp, sizeof(state->vp)); + rails = elan_nRails(state); + mca_btl_elan_component.elan_num_btls = rails; + if ((q = elan_allocQueue(base->state)) == NULL) { + return NULL; + } + if (!(p = elan_tportInit(base->state, + (ELAN_QUEUE *)q, + base->tport_nslots, + base->tport_smallmsg, + base->tport_bigmsg, + base->tport_stripemsg, + ELAN_POLL_EVENT, + base->retryCount, + &base->shm_key, + base->shm_fifodepth, + base->shm_fragsize, + 0))) { + return NULL; + } + mca_btl_elan_component.elan_btls = malloc( (mca_btl_elan_component.elan_num_btls) * sizeof(mca_btl_base_module_t*) ); + for( i = count = 0; i < mca_btl_elan_component.elan_num_btls; i++ ) { + mca_btl_elan_module_t* btl = malloc (sizeof (mca_btl_elan_module_t)); + if(NULL == btl) + continue; + memcpy( btl, &mca_btl_elan_module, sizeof(mca_btl_elan_module_t) ); + OBJ_CONSTRUCT (&btl->elan_lock, opal_mutex_t); + btl->tportFIFOHead=NULL; + btl->tportFIFOTail=NULL; + btl->base = base; + btl->state = state; + btl->queue = q; + btl->tport = p; + btl->elan_vp = state->vp; + btl->elan_nvp = state->nvp; + mca_btl_elan_component.elan_btls[count++] = btl; + } + mca_btl_elan_component.elan_num_btls = count ; + btls = (mca_btl_base_module_t**)malloc( (mca_btl_elan_component.elan_num_btls) * sizeof(mca_btl_base_module_t*) ); + if( NULL == btls ) { + free( mca_btl_elan_component.elan_btls ); + mca_btl_elan_component.elan_num_btls = 0; /* no active BTL modules */ + return NULL; + } + memcpy( btls, mca_btl_elan_component.elan_btls, mca_btl_elan_component.elan_num_btls *sizeof(mca_btl_elan_module_t*) ); + *num_btl_modules = mca_btl_elan_component.elan_num_btls; + return btls; +} + +/* + * Elan4 component progress. + */ + + + + +int mca_btl_elan_component_progress() +{ + size_t num_progressed = 0, i, no_btls, size; + mca_btl_elan_frag_t* frag; + bufdesc_t* desc; + no_btls = mca_btl_elan_component.elan_num_btls; + for (i = 0; i < no_btls; i++) { + mca_btl_elan_module_t* elan_btl = mca_btl_elan_component.elan_btls[i]; + OPAL_THREAD_LOCK(&elan_btl->elan_lock); + desc = elan_ipeek(elan_btl); + OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); + if(desc ==NULL) + continue; + frag = (mca_btl_elan_frag_t*) desc->frag; + if(frag!=NULL) + { + if(frag->type== MCA_BTL_ELAN_HDR_TYPE_SEND ) + { + /* it's a send */ + /* call the completion callback */ + elan_tportTxWait(desc->eve); + frag->base.des_cbfunc( &(elan_btl->super), frag->endpoint, &(frag->base), OMPI_SUCCESS ); + free(desc); + + } + else if(frag->type== MCA_BTL_ELAN_HDR_TYPE_PUT || frag->type== MCA_BTL_ELAN_HDR_TYPE_GET ) + { + /* it's a put*/ + /* call the completion callback */ + elan_wait(desc->eve,ELAN_WAIT_EVENT); + frag->base.des_cbfunc( &(elan_btl->super), frag->endpoint, &(frag->base), OMPI_SUCCESS ); + free(desc); + } + else{ + /* and this one is a receive */ + mca_btl_base_recv_reg_t* reg; + reg = &(elan_btl->elan_reg[frag->tag]); + elan_tportRxWait(desc->eve, NULL, NULL, &size); + frag->base.des_dst->seg_len = size; + reg->cbfunc( &(elan_btl->super), frag->tag, &(frag->base),reg->cbdata ); + /** + * The upper level extract the data from the fragment. + * Now we can register the fragment + * again with the elan BTL. + */ + desc->eve = elan_tportRxStart (elan_btl->tport, 0 , 0, 0, 0xffffffff, frag->tag, frag->base.des_dst->seg_addr.pval, mca_btl_elan_module.super.btl_eager_limit) ; + /*desc->eve = elan_tportRxStart (elan_btl->tport, ELAN_TPORT_RXANY , 0, 0, 0, 0, frag->base.des_dst->seg_addr.pval, mca_btl_elan_module.super.btl_eager_limit) ;*/ + + desc->frag = frag; + desc->next = NULL; + BTL_ELAN_ADD_TO_FIFO( elan_btl, desc ); + } + } else { + opal_output( 0, "Something bad happened the frag == NULL\n" ); + } + num_progressed++; + + } + + return num_progressed; +} + + diff --git a/ompi/mca/btl/elan/btl_elan_endpoint.c b/ompi/mca/btl/elan/btl_elan_endpoint.c new file mode 100644 index 0000000000..923baaf4a5 --- /dev/null +++ b/ompi/mca/btl/elan/btl_elan_endpoint.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include +#include +#include "ompi/types.h" +#include "orte/mca/ns/base/base.h" +#include "orte/mca/oob/base/base.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/dss/dss.h" +#include "btl_elan.h" +#include "btl_elan_endpoint.h" +#include "btl_elan_proc.h" +#include "btl_elan_frag.h" + + +/* + * Initialize state of the endpoint instance. + * + */ + +static void mca_btl_elan_endpoint_construct(mca_btl_base_endpoint_t* endpoint) +{ + endpoint->endpoint_btl = NULL; + endpoint->endpoint_proc = NULL; +} + +/* + * Destroy a endpoint + * + */ + +static void mca_btl_elan_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) +{ +} + + +OBJ_CLASS_INSTANCE( + mca_btl_elan_endpoint_t, + opal_list_item_t, + mca_btl_elan_endpoint_construct, + mca_btl_elan_endpoint_destruct); + diff --git a/ompi/mca/btl/elan/btl_elan_endpoint.h b/ompi/mca/btl/elan/btl_elan_endpoint.h new file mode 100644 index 0000000000..8032a2365d --- /dev/null +++ b/ompi/mca/btl/elan/btl_elan_endpoint.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_TEMPLATE_ENDPOINT_H +#define MCA_BTL_TEMPLATE_ENDPOINT_H + +#include "opal/class/opal_list.h" +#include "opal/event/event.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/btl/btl.h" +#include "btl_elan_frag.h" +#include "btl_elan.h" +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + +/** + * State of ELAN endpoint connection. + */ + +/** + * An abstraction that represents a connection to a endpoint process. + * An instance of mca_btl_base_endpoint_t is associated w/ each process + * and BTL pair at startup. However, connections to the endpoint + * are established dynamically on an as-needed basis: + */ + +struct mca_btl_base_endpoint_t { + opal_list_item_t super; + + struct mca_btl_elan_module_t* endpoint_btl; + /**< BTL instance that created this connection */ + + struct mca_btl_elan_proc_t* endpoint_proc; + /**< proc structure corresponding to endpoint */ + + unsigned int elan_vp; + +}; + +typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t; +typedef mca_btl_base_endpoint_t mca_btl_elan_endpoint_t; +OBJ_CLASS_DECLARATION(mca_btl_elan_endpoint_t); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/ompi/mca/btl/elan/btl_elan_frag.c b/ompi/mca/btl/elan/btl_elan_frag.c new file mode 100644 index 0000000000..97101d44f5 --- /dev/null +++ b/ompi/mca/btl/elan/btl_elan_frag.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "btl_elan_frag.h" + +static void mca_btl_elan_frag_common_constructor(mca_btl_elan_frag_t* frag) +{ + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; +} + +static void mca_btl_elan_frag_eager_constructor(mca_btl_elan_frag_t* frag) +{ + frag->registration = NULL; + frag->size = mca_btl_elan_module.super.btl_eager_limit; + mca_btl_elan_frag_common_constructor(frag); +} + +static void mca_btl_elan_frag_max_constructor(mca_btl_elan_frag_t* frag) +{ + frag->registration = NULL; + frag->size = mca_btl_elan_module.super.btl_max_send_size; + mca_btl_elan_frag_common_constructor(frag); +} + +static void mca_btl_elan_frag_user_constructor(mca_btl_elan_frag_t* frag) +{ + frag->size = 0; + mca_btl_elan_frag_common_constructor(frag); +} + + +OBJ_CLASS_INSTANCE( + mca_btl_elan_frag_t, + mca_btl_base_descriptor_t, + NULL, + NULL); + +OBJ_CLASS_INSTANCE( + mca_btl_elan_frag_eager_t, + mca_btl_base_descriptor_t, + mca_btl_elan_frag_eager_constructor, + NULL); + +OBJ_CLASS_INSTANCE( + mca_btl_elan_frag_max_t, + mca_btl_base_descriptor_t, + mca_btl_elan_frag_max_constructor, + NULL); + +OBJ_CLASS_INSTANCE( + mca_btl_elan_frag_user_t, + mca_btl_base_descriptor_t, + mca_btl_elan_frag_user_constructor, + NULL); + diff --git a/ompi/mca/btl/elan/btl_elan_frag.h b/ompi/mca/btl/elan/btl_elan_frag.h new file mode 100644 index 0000000000..90776a5894 --- /dev/null +++ b/ompi/mca/btl/elan/btl_elan_frag.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_TEMPLATE_FRAG_H +#define MCA_BTL_TEMPLATE_FRAG_H + + +#define MCA_BTL_TEMPLATE_FRAG_ALIGN (8) +#include "ompi_config.h" +#include "btl_elan.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +#define MCA_BTL_ELAN_HDR_TYPE_SEND 1 +#define MCA_BTL_ELAN_HDR_TYPE_PUT 2 +#define MCA_BTL_ELAN_HDR_TYPE_GET 3 +#define MCA_BTL_ELAN_HDR_TYPE_MATCH 4 +#define MCA_BTL_ELAN_HDR_TYPE_FRAG 5 +#define MCA_BTL_ELAN_HDR_TYPE_ACK 6 +#define MCA_BTL_ELAN_HDR_TYPE_NACK 7 +#define MCA_BTL_ELAN_HDR_TYPE_FIN 8 +#define MCA_BTL_ELAN_HDR_TYPE_FIN_ACK 9 +#define MCA_BTL_ELAN_HDR_TYPE_RECV 10 + + +/** + * TEMPLATE send fraelanent derived type. + */ +struct mca_btl_elan_frag_t { + mca_btl_base_descriptor_t base; + mca_btl_base_segment_t segment; + struct mca_btl_base_endpoint_t *endpoint; + struct mca_btl_elan_module_t* btl; + int type; + ompi_free_list_t* my_list; + mca_btl_base_tag_t tag; + size_t size; +#if defined MCA_BTL_HAS_MPOOL + struct mca_mpool_base_registration_t* registration; +#endif +}; +typedef struct mca_btl_elan_frag_t mca_btl_elan_frag_t; +OBJ_CLASS_DECLARATION(mca_btl_elan_frag_t); + +typedef struct mca_btl_elan_frag_t mca_btl_elan_frag_eager_t; + +OBJ_CLASS_DECLARATION(mca_btl_elan_frag_eager_t); + +typedef struct mca_btl_elan_frag_t mca_btl_elan_frag_max_t; + +OBJ_CLASS_DECLARATION(mca_btl_elan_frag_max_t); + +typedef struct mca_btl_elan_frag_t mca_btl_elan_frag_user_t; + +OBJ_CLASS_DECLARATION(mca_btl_elan_frag_user_t); + + +/* + * Macros to allocate/return descriptors from module specific + * free list(s). + */ + +#define MCA_BTL_TEMPLATE_FRAG_ALLOC_EAGER(frag, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_WAIT(&mca_btl_elan_component.elan_frag_eager, item, rc); \ + frag = (mca_btl_elan_frag_t*) item; \ + frag->segment.seg_addr.pval = (void*)(frag+1); \ + frag->my_list = &mca_btl_elan_component.elan_frag_eager; \ +} + + +#define MCA_BTL_TEMPLATE_FRAG_ALLOC_MAX(frag, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_WAIT(&mca_btl_elan_component.elan_frag_max, item, rc); \ + frag = (mca_btl_elan_frag_t*) item; \ + frag->segment.seg_addr.pval = (void*)(frag+1); \ + frag->my_list = &mca_btl_elan_component.elan_frag_max; \ +} + + +#define MCA_BTL_TEMPLATE_FRAG_ALLOC_USER(frag, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_WAIT(&mca_btl_elan_component.elan_frag_user, item, rc); \ + frag = (mca_btl_elan_frag_t*) item; \ + frag->my_list = &mca_btl_elan_component.elan_frag_user; \ +} + +#define MCA_BTL_TEMPLATE_FRAG_RETURN(frag) \ +{ \ + OMPI_FREE_LIST_RETURN(frag->my_list, \ + (ompi_free_list_item_t*)(frag)); \ +} + + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/ompi/mca/btl/elan/btl_elan_proc.c b/ompi/mca/btl/elan/btl_elan_proc.c new file mode 100644 index 0000000000..5d0ef4388c --- /dev/null +++ b/ompi/mca/btl/elan/btl_elan_proc.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "orte/class/orte_proc_table.h" +#include "opal/class/opal_hash_table.h" +#include "ompi/runtime/ompi_module_exchange.h" + +#include "btl_elan.h" +#include "btl_elan_proc.h" + +static void mca_btl_elan_proc_construct(mca_btl_elan_proc_t* proc); +static void mca_btl_elan_proc_destruct(mca_btl_elan_proc_t* proc); + +OBJ_CLASS_INSTANCE(mca_btl_elan_proc_t, + opal_list_item_t, mca_btl_elan_proc_construct, + mca_btl_elan_proc_destruct); + +void mca_btl_elan_proc_construct(mca_btl_elan_proc_t* proc) +{ + proc->proc_ompi = 0; + proc->proc_addr_count = 0; + proc->proc_endpoints = 0; + proc->proc_endpoint_count = 0; + OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t); + /* add to list of all proc instance */ + OPAL_THREAD_LOCK(&mca_btl_elan_component.elan_lock); + opal_list_append(&mca_btl_elan_component.elan_procs, &proc->super); + OPAL_THREAD_UNLOCK(&mca_btl_elan_component.elan_lock); +} + +/* + * Cleanup ib proc instance + */ + +void mca_btl_elan_proc_destruct(mca_btl_elan_proc_t* proc) +{ + /* remove from list of all proc instances */ + OPAL_THREAD_LOCK(&mca_btl_elan_component.elan_lock); + opal_list_remove_item(&mca_btl_elan_component.elan_procs, &proc->super); + OPAL_THREAD_UNLOCK(&mca_btl_elan_component.elan_lock); + + /* release resources */ + if(NULL != proc->proc_endpoints) { + free(proc->proc_endpoints); + } +} + + +/* + * Look for an existing Elan process instances based on the associated + * ompi_proc_t instance. + */ +static mca_btl_elan_proc_t* mca_btl_elan_proc_lookup_ompi(ompi_proc_t* ompi_proc) +{ + mca_btl_elan_proc_t* elan_proc; + + OPAL_THREAD_LOCK(&mca_btl_elan_component.elan_lock); + + for(elan_proc = (mca_btl_elan_proc_t*) + opal_list_get_first(&mca_btl_elan_component.elan_procs); + elan_proc != (mca_btl_elan_proc_t*) + opal_list_get_end(&mca_btl_elan_component.elan_procs); + elan_proc = (mca_btl_elan_proc_t*)opal_list_get_next(elan_proc)) { + + if(elan_proc->proc_ompi == ompi_proc) { + OPAL_THREAD_UNLOCK(&mca_btl_elan_component.elan_lock); + return elan_proc; + } + + } + + OPAL_THREAD_UNLOCK(&mca_btl_elan_component.elan_lock); + + return NULL; +} + +/* + * Create a TEMPLATE process structure. There is a one-to-one correspondence + * between a ompi_proc_t and a mca_btl_elan_proc_t instance. We cache + * additional data (specifically the list of mca_btl_elan_endpoint_t instances, + * and published addresses) associated w/ a given destination on this + * datastructure. + */ + +mca_btl_elan_proc_t* mca_btl_elan_proc_create(ompi_proc_t* ompi_proc) +{ + int rc; + size_t size; + mca_btl_elan_proc_t* module_proc = NULL; + + /* Check if we have already created a Elan proc + * structure for this ompi process */ + module_proc = mca_btl_elan_proc_lookup_ompi(ompi_proc); + + if(module_proc != NULL) { + /* Gotcha! */ + return module_proc; + } + + /* Oops! First time, gotta create a new Elan proc + * out of the ompi_proc ... */ + + module_proc = OBJ_NEW(mca_btl_elan_proc_t); + if(NULL == module_proc) + return NULL; + /* Initialize number of peer */ + module_proc->proc_endpoint_count = 0; + module_proc->proc_ompi = ompi_proc; + + /* build a unique identifier (of arbitrary + * size) to represent the proc */ + module_proc->proc_guid = ompi_proc->proc_name; + + /* Elan module doesn't have addresses exported at + * initialization, so the addr_count is set to one. */ + + /** + orte_hash_table_set_proc( &mca_btl_elan_component.elan_procs, + &module_proc->proc_guid, + module_proc );*/ + + rc = ompi_modex_recv( &mca_btl_elan_component.super.btl_version, + ompi_proc, + (void**)&module_proc->elan_vp_array, + &size ); + if(rc != OMPI_SUCCESS) { + BTL_ERROR(("mca_base_modex_recv: failed with return value=%d", rc)); + OBJ_RELEASE(module_proc); + return NULL; + } + module_proc->proc_addr_count = size / sizeof(unsigned int);; + /* XXX: Right now, there can be only 1 peer associated + * with a proc. Needs a little bit change in + * mca_btl_elan_proc_t to allow on demand increasing of + * number of endpoints for this proc */ + + module_proc->proc_endpoints = (mca_btl_base_endpoint_t**) + malloc((1+module_proc->proc_addr_count )* sizeof(mca_btl_base_endpoint_t*)); + + if(NULL == module_proc->proc_endpoints) { + OBJ_RELEASE(module_proc); + return NULL; + } + return module_proc; +} + + +/* + * Note that this routine must be called with the lock on the process + * already held. Insert a btl instance into the proc array and assign + * it an address. + */ +int mca_btl_elan_proc_insert( mca_btl_elan_proc_t* module_proc, + mca_btl_base_endpoint_t* module_endpoint ) +{ + /* insert into endpoint array */ + size_t i; + + module_endpoint->endpoint_proc = module_proc; + module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint; + + for( i = 0; i < module_proc->proc_addr_count; i++ ) { + module_endpoint->elan_vp = module_proc->elan_vp_array[i]; + return OMPI_SUCCESS; + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/btl/elan/btl_elan_proc.h b/ompi/mca/btl/elan/btl_elan_proc.h new file mode 100644 index 0000000000..ca33a15285 --- /dev/null +++ b/ompi/mca/btl/elan/btl_elan_proc.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_TEMPLATE_PROC_H +#define MCA_BTL_TEMPLATE_PROC_H + +#include "orte/mca/ns/ns.h" +#include "opal/class/opal_object.h" +#include "ompi/proc/proc.h" +#include "btl_elan.h" +#include "btl_elan_endpoint.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/** + * Represents the state of a remote process and the set of addresses + * that it exports. Also cache an instance of mca_btl_base_endpoint_t for + * each + * BTL instance that attempts to open a connection to the process. + */ +struct mca_btl_elan_proc_t { + opal_list_item_t super; + /**< allow proc to be placed on a list */ + + ompi_proc_t *proc_ompi; + /**< pointer to corresponding ompi_proc_t */ + + orte_process_name_t proc_guid; + /**< globally unique identifier for the process */ + + unsigned int *elan_vp_array; + + size_t proc_addr_count; + /**< number of addresses published by endpoint */ + + struct mca_btl_base_endpoint_t **proc_endpoints; + /**< array of endpoints that have been created to access this proc */ + + size_t proc_endpoint_count; + /**< number of endpoints */ + + opal_mutex_t proc_lock; + /**< lock to protect against concurrent access to proc state */ +}; +typedef struct mca_btl_elan_proc_t mca_btl_elan_proc_t; +OBJ_CLASS_DECLARATION(mca_btl_elan_proc_t); + +mca_btl_elan_proc_t* mca_btl_elan_proc_create(ompi_proc_t* ompi_proc); +int mca_btl_elan_proc_insert(mca_btl_elan_proc_t*, mca_btl_base_endpoint_t*); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/ompi/mca/btl/elan/configure.m4 b/ompi/mca/btl/elan/configure.m4 new file mode 100644 index 0000000000..d08f5e2e2e --- /dev/null +++ b/ompi/mca/btl/elan/configure.m4 @@ -0,0 +1,82 @@ +# -*- shell-script -*- +# +# Copyright (c) 200-2007 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_CHECK_ELAN(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +# check if Elan support can be found. sets prefix_{CPPFLAGS, +# LDFLAGS, LIBS} as needed and runs action-if-found if there is +# support, otherwise executes action-if-not-found +AC_DEFUN([OMPI_CHECK_ELAN],[ + AC_ARG_WITH([elan], + [AC_HELP_STRING([--with-elan(=DIR)], + [Build Elan (QsNet2) support, searching for libraries in DIR])]) + AC_ARG_WITH([elan-libdir], + [AC_HELP_STRING([--with-elan-libdir=DIR], + [Search for Elan (QsNet2) libraries in DIR])]) + + AS_IF([test "$with_elan" != "no"], + [AS_IF([test ! -z "$with_elan" -a "$with_elan" != "yes"], + [ompi_check_elan_dir="$with_elan"]) + AS_IF([test ! -z "$with_elan_libdir" -a "$with_elan_libdir" != "yes"], + [ompi_check_elan_libdir="$with_elan_libdir"]) + + OMPI_CHECK_PACKAGE([$1], + [elan/elan.h], + [elan], + [elan_init], + [], + [$ompi_check_elan_dir], + [$ompi_check_elan_libdir], + [ompi_check_elan_happy="yes"], + [ompi_check_elan_happy="no"]) + AS_IF([test "$ompi_check_elan_happy" = "yes"], + [OMPI_CHECK_PACKAGE([$1], + [elan3/elan3.h], + [elan3], + [elan3_fini_neterr_svc], + [-lelan], + [$ompi_check_elan_dir], + [$ompi_check_elan_libdir], + [ompi_check_elan_happy="yes"], + [ompi_check_elan_happy="no"])] + ) + ], + [ompi_check_elan_happy="no"]) + + AS_IF([test "$ompi_check_elan_happy" = "yes"], + [$2], + [AS_IF([test ! -z "$with_elan" -a "$with_elan" != "no"], + [AC_MSG_ERROR([Elan (QsNet2) support requested but not found. Aborting])]) + $3]) + ]) + +# MCA_btl_elan_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_btl_elan_CONFIG],[ + OMPI_CHECK_ELAN([btl_elan], + [btl_elan_happy="yes"], + [btl_elan_happy="no"]) + + AS_IF([test "$btl_elan_happy" = "yes"], + [btl_elan_WRAPPER_EXTRA_LDFLAGS="$btl_elan_LDFLAGS" + btl_elan_WRAPPER_EXTRA_LIBS="$btl_elan_LIBS" + $1], + [$2]) + + # substitute in the things needed to build elan + AC_SUBST([btl_elan_CFLAGS]) + AC_SUBST([btl_elan_CPPFLAGS]) + AC_SUBST([btl_elan_LDFLAGS]) + AC_SUBST([btl_elan_LIBS]) +])dnl + diff --git a/ompi/mca/btl/elan/configure.params b/ompi/mca/btl/elan/configure.params new file mode 100644 index 0000000000..79cabf3e91 --- /dev/null +++ b/ompi/mca/btl/elan/configure.params @@ -0,0 +1,15 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2007 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_CONFIG_FILES="Makefile"