The latest and greatest Elan improvements.
This commit was SVN r17361.
Этот коммит содержится в:
родитель
982acaa2c9
Коммит
3a6d2e3894
@ -82,18 +82,18 @@ static int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl,
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
elan_btl->base = base;
|
||||
|
||||
/* Create the global queue */
|
||||
if( (elan_btl->global_queue = elan_gallocQueue(base, base->allGroup)) == NULL ) {
|
||||
elan_btl->elan_vp = base->state->vp;
|
||||
/* Create the tport global queue */
|
||||
if( (elan_btl->tport_queue = elan_gallocQueue(base, base->allGroup)) == NULL ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Create and initialize the tport */
|
||||
if( !(elan_btl->tport = elan_tportInit(base->state,
|
||||
elan_btl->global_queue,
|
||||
elan_btl->tport_queue,
|
||||
mca_btl_elan_component.elan_max_posted_recv,
|
||||
base->tport_smallmsg,
|
||||
mca_btl_elan_module.super.btl_rndv_eager_limit,
|
||||
mca_btl_elan_module.super.btl_eager_limit,
|
||||
base->tport_stripemsg,
|
||||
ELAN_POLL_EVENT,
|
||||
base->retryCount,
|
||||
@ -105,11 +105,17 @@ static int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl,
|
||||
}
|
||||
|
||||
/* Create the receive queue */
|
||||
if( (elan_btl->global_queue = elan_gallocQueue(base, base->allGroup)) == NULL ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
mca_btl_elan_component.queue_max_size = elan_queueMaxSlotSize( base->state )
|
||||
- sizeof(mca_btl_elan_hdr_t);
|
||||
|
||||
elan_btl->rx_queue =
|
||||
elan_queueRxInit( base->state, /* ELAN_STATE *state */
|
||||
elan_btl->global_queue, /* ELAN_QUEUE *queue */
|
||||
mca_btl_elan_component.elan_max_posted_recv, /* int nSlots */
|
||||
mca_btl_elan_module.super.btl_eager_limit, /* int slotSize */
|
||||
(int)mca_btl_elan_component.queue_max_size, /* int slotSize */
|
||||
ELAN_RAIL_ALL, /* int rail */
|
||||
(ELAN_TPORT_SHM_DISABLE |
|
||||
ELAN_TPORT_USERCOPY_DISABLE) /* ELAN_FLAGS flags */);
|
||||
@ -152,6 +158,27 @@ static int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl,
|
||||
peers[i] = elan_endpoint;
|
||||
}
|
||||
|
||||
for( i = 0; i < mca_btl_elan_component.elan_max_posted_recv; i++ ) {
|
||||
mca_btl_elan_frag_t* frag;
|
||||
|
||||
MCA_BTL_ELAN_FRAG_ALLOC_EAGER(frag, rc );
|
||||
if( NULL == frag ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
frag->segment.seg_addr.pval = (void*)(frag + 1);
|
||||
frag->base.des_dst = &(frag->segment);
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->type = MCA_BTL_ELAN_HDR_TYPE_RECV;
|
||||
frag->elan_event = elan_tportRxStart( elan_btl->tport,
|
||||
ELAN_TPORT_RXBUF | ELAN_TPORT_RXANY,
|
||||
0, 0, 0, 0,
|
||||
frag->segment.seg_addr.pval,
|
||||
mca_btl_elan_module.super.btl_eager_limit );
|
||||
opal_list_append( &(elan_btl->recv_list), (opal_list_item_t*)frag );
|
||||
}
|
||||
|
||||
/* enable the network */
|
||||
elan_enable_network( elan_btl->base->state );
|
||||
|
||||
@ -191,24 +218,26 @@ mca_btl_elan_alloc( struct mca_btl_base_module_t* btl,
|
||||
size_t size,
|
||||
uint32_t flags )
|
||||
{
|
||||
mca_btl_elan_frag_t* frag;
|
||||
mca_btl_elan_frag_t* frag = NULL;
|
||||
ptrdiff_t hdr_skip = 0;
|
||||
int rc;
|
||||
|
||||
if( size <= btl->btl_eager_limit ) {
|
||||
MCA_BTL_ELAN_FRAG_ALLOC_EAGER(frag, rc);
|
||||
if( OPAL_UNLIKELY(NULL == frag) ) {
|
||||
if( NULL == frag ) {
|
||||
return NULL;
|
||||
}
|
||||
if( size <= mca_btl_elan_component.queue_max_size ) { /* This will be going over the queue */
|
||||
hdr_skip = sizeof(mca_btl_elan_hdr_t);
|
||||
}
|
||||
} else if( size <= btl->btl_max_send_size ) {
|
||||
MCA_BTL_ELAN_FRAG_ALLOC_MAX(frag, rc);
|
||||
}
|
||||
if( OPAL_UNLIKELY(NULL == frag) ) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_addr.pval = (void*)((char*)(frag + 1)
|
||||
+ sizeof(mca_btl_elan_hdr_t));
|
||||
|
||||
frag->segment.seg_addr.pval = (void*)((char*)(frag + 1) + hdr_skip);
|
||||
frag->segment.seg_len = size;
|
||||
frag->base.des_src = &(frag->segment);
|
||||
frag->base.des_src_cnt = 1;
|
||||
@ -262,49 +291,43 @@ mca_btl_elan_prepare_src( struct mca_btl_base_module_t* btl,
|
||||
mca_btl_elan_frag_t* frag;
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
size_t max_data = *size;
|
||||
size_t max_data = *size, skip = 0;
|
||||
int rc;
|
||||
|
||||
if( OPAL_UNLIKELY(max_data > UINT32_MAX) ) {
|
||||
max_data = (size_t)UINT32_MAX;
|
||||
}
|
||||
if( max_data+reserve <= btl->btl_eager_limit ) {
|
||||
if( 0 != reserve ) {
|
||||
if( max_data + reserve <= btl->btl_eager_limit ) {
|
||||
MCA_BTL_ELAN_FRAG_ALLOC_EAGER(frag, rc);
|
||||
if( NULL == frag ) {
|
||||
return NULL;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (void*)((unsigned char*) frag->segment.seg_addr.pval + reserve);
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_ELAN_FRAG_RETURN(frag);
|
||||
return NULL;
|
||||
if( (max_data + reserve) <= mca_btl_elan_component.queue_max_size ) {
|
||||
skip = sizeof(mca_btl_elan_hdr_t);
|
||||
}
|
||||
frag->segment.seg_addr.pval = frag+1;
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
}
|
||||
else if( max_data+reserve <= btl->btl_max_send_size ) {
|
||||
} else {
|
||||
MCA_BTL_ELAN_FRAG_ALLOC_MAX(frag, rc);
|
||||
if( NULL == frag ) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(max_data + reserve > btl->btl_max_send_size){
|
||||
if( (max_data + reserve) > btl->btl_max_send_size ) {
|
||||
max_data = btl->btl_max_send_size - reserve;
|
||||
}
|
||||
}
|
||||
frag->segment.seg_addr.pval = (void*)((unsigned char*)(frag + 1) + skip);
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
|
||||
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_ELAN_FRAG_RETURN(frag);
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_addr.pval = frag+1;
|
||||
*size = max_data;
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
} else {
|
||||
} else { /* this is a real RDMA operation */
|
||||
MCA_BTL_ELAN_FRAG_ALLOC_USER(frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
@ -317,6 +340,7 @@ mca_btl_elan_prepare_src( struct mca_btl_base_module_t* btl,
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
frag->segment.seg_len = max_data;
|
||||
}
|
||||
|
||||
frag->base.des_src = &(frag->segment);
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.order = MCA_BTL_NO_ORDER;
|
||||
@ -352,6 +376,7 @@ mca_btl_elan_prepare_dst( struct mca_btl_base_module_t* btl,
|
||||
uint32_t flags )
|
||||
{
|
||||
mca_btl_elan_frag_t* frag;
|
||||
size_t origin, position = *size;
|
||||
int rc;
|
||||
|
||||
if( OPAL_UNLIKELY((*size) > UINT32_MAX) ) {
|
||||
@ -361,11 +386,14 @@ mca_btl_elan_prepare_dst( struct mca_btl_base_module_t* btl,
|
||||
if( NULL == frag ) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ompi_convertor_get_current_pointer( convertor, (void**)&(frag->segment.seg_addr.pval) );
|
||||
origin = convertor->bConverted;
|
||||
position += origin;
|
||||
ompi_convertor_set_position( convertor, &position );
|
||||
*size = position - origin;
|
||||
|
||||
frag->segment.seg_len = *size;
|
||||
frag->segment.seg_key.key64 = (uint64_t)(intptr_t)convertor;
|
||||
/*frag->segment.seg_addr.pval = convertor->pBaseBuf + convertor->bConverted;*/
|
||||
frag->type = MCA_BTL_ELAN_HDR_TYPE_PUT;
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
@ -401,6 +429,7 @@ static int mca_btl_elan_send( struct mca_btl_base_module_t* btl,
|
||||
frag->tag = tag;
|
||||
frag->type = MCA_BTL_ELAN_HDR_TYPE_SEND;
|
||||
|
||||
if( frag->segment.seg_len <= mca_btl_elan_component.queue_max_size ) {
|
||||
elan_hdr->tag = (int)tag;
|
||||
elan_hdr->length = (int)frag->segment.seg_len;
|
||||
send_len = frag->segment.seg_len + sizeof(mca_btl_elan_hdr_t);
|
||||
@ -417,7 +446,17 @@ static int mca_btl_elan_send( struct mca_btl_base_module_t* btl,
|
||||
&(frag->base), OMPI_SUCCESS );
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
} else {
|
||||
frag->elan_event = elan_tportTxStart( elan_btl->tport, 0, endpoint->elan_vp,
|
||||
elan_btl->elan_vp, frag->tag,
|
||||
(void*)elan_hdr, frag->segment.seg_len );
|
||||
if( elan_tportTxDone(frag->elan_event) ) {
|
||||
elan_tportTxWait(frag->elan_event);
|
||||
frag->base.des_cbfunc( &(elan_btl->super), frag->endpoint,
|
||||
&(frag->base), OMPI_SUCCESS );
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
/* Add the fragment to the pending send list */
|
||||
opal_list_append( &(elan_btl->send_list), (opal_list_item_t*)frag );
|
||||
return OMPI_SUCCESS;
|
||||
@ -474,11 +513,13 @@ static int mca_btl_elan_get( mca_btl_base_module_t* btl,
|
||||
mca_btl_base_segment_t* dst = des->des_dst;
|
||||
unsigned char* src_addr = (unsigned char*)src->seg_addr.pval;
|
||||
size_t src_len = src->seg_len;
|
||||
unsigned char* dst_addr = (unsigned char*)ompi_ptr_ltop(dst->seg_addr.lval);
|
||||
unsigned char* dst_addr = (unsigned char*)dst->seg_addr.lval;
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
frag->btl = elan_btl;
|
||||
frag->type = MCA_BTL_ELAN_HDR_TYPE_GET;
|
||||
opal_output( 0, "elan_get( remote %p, local %p, length %d, peer %d )\n",
|
||||
(void*)src_addr, (void*)dst_addr, (int)src_len, peer );
|
||||
frag->elan_event = elan_get(elan_btl->base->state, src_addr, dst_addr, src_len, peer);
|
||||
/* Add the fragment to the pending RDMA list */
|
||||
opal_list_append( &(elan_btl->rdma_list), (opal_list_item_t*)frag );
|
||||
@ -505,10 +546,13 @@ int mca_btl_elan_finalize( struct mca_btl_base_module_t* btl )
|
||||
}
|
||||
mca_btl_elan_component.elan_num_btls--;
|
||||
|
||||
/* Cancel all posted receives */
|
||||
|
||||
/* Release the internal structures */
|
||||
OBJ_DESTRUCT(&elan_btl->elan_lock);
|
||||
OBJ_DESTRUCT(&elan_btl->recv_list);
|
||||
OBJ_DESTRUCT(&elan_btl->send_list);
|
||||
OBJ_DESTRUCT(&elan_btl->rdma_list);
|
||||
OBJ_DESTRUCT(&elan_btl->elan_lock);
|
||||
/* The BTL is clean, remove it */
|
||||
free(elan_btl);
|
||||
|
||||
|
@ -43,8 +43,8 @@ BEGIN_C_DECLS
|
||||
struct mca_btl_elan_component_t {
|
||||
mca_btl_base_component_1_0_1_t super; /**< base BTL component */
|
||||
|
||||
uint32_t ib_max_btls;
|
||||
/**< maximum number of hcas available to the ELAN component */
|
||||
size_t queue_max_size;
|
||||
/**< maximum amount of data transfered using the queues */
|
||||
|
||||
uint32_t elan_num_btls;
|
||||
/**< number of hcas available to the ELAN component */
|
||||
@ -87,14 +87,18 @@ OMPI_MODULE_DECLSPEC extern mca_btl_elan_component_t mca_btl_elan_component;
|
||||
|
||||
struct mca_btl_elan_module_t {
|
||||
mca_btl_base_module_t super; /**< base BTL interface */
|
||||
int expect_tport_recv;
|
||||
int elan_vp;
|
||||
ELAN_BASE* base;
|
||||
ELAN_TPORT* tport;
|
||||
ELAN_QUEUE* global_queue; /**< The global queue */
|
||||
ELAN_QUEUE* tport_queue;
|
||||
ELAN_QUEUE_RX* rx_queue; /**< The local receive queue */
|
||||
ELAN_QUEUE_TX* tx_queue; /**< The global send queue */
|
||||
opal_mutex_t elan_lock;
|
||||
opal_list_t send_list; /**< list of posted sends */
|
||||
opal_list_t rdma_list; /**< list of posted receives */
|
||||
opal_list_t recv_list;
|
||||
};
|
||||
typedef struct mca_btl_elan_module_t mca_btl_elan_module_t;
|
||||
extern mca_btl_elan_module_t mca_btl_elan_module;
|
||||
|
@ -90,28 +90,19 @@ int mca_btl_elan_component_open(void)
|
||||
mca_btl_elan_component.elan_num_btls = 0;
|
||||
mca_btl_elan_component.elan_btls = NULL;
|
||||
|
||||
mca_btl_elan_module.super.btl_exclusivity = 0;
|
||||
mca_btl_elan_module.super.btl_eager_limit = 2*1024 - sizeof(mca_btl_elan_hdr_t);
|
||||
mca_btl_elan_module.super.btl_rndv_eager_limit = 32*1024 - sizeof(mca_btl_elan_hdr_t);
|
||||
mca_btl_elan_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT;
|
||||
mca_btl_elan_module.super.btl_eager_limit = 32*1024;
|
||||
mca_btl_elan_module.super.btl_rndv_eager_limit = mca_btl_elan_module.super.btl_eager_limit;
|
||||
mca_btl_elan_module.super.btl_max_send_size = 64*1024; /*64*1024;*/
|
||||
mca_btl_elan_module.super.btl_rdma_pipeline_send_length = 512 * 1024;
|
||||
mca_btl_elan_module.super.btl_rdma_pipeline_frag_size = 128 * 1024;
|
||||
mca_btl_elan_module.super.btl_min_rdma_pipeline_size = 128 * 1024;
|
||||
mca_btl_elan_module.super.btl_flags = /* MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_GET |*/ MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND;
|
||||
/* mca_btl_elan_module.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE|MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND ;*/
|
||||
mca_btl_elan_module.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND;
|
||||
mca_btl_elan_module.super.btl_bandwidth = 1959;
|
||||
mca_btl_elan_module.super.btl_latency = 4;
|
||||
mca_btl_base_param_register(&mca_btl_elan_component.super.btl_version,
|
||||
&mca_btl_elan_module.super);
|
||||
|
||||
/* register Elan4 component parameters */
|
||||
mca_btl_elan_component.elan_free_list_num =
|
||||
mca_btl_elan_param_register_int ("free_list_num", 8);
|
||||
mca_btl_elan_component.elan_free_list_max =
|
||||
mca_btl_elan_param_register_int ("free_list_max", 128);
|
||||
mca_btl_elan_component.elan_free_list_inc =
|
||||
mca_btl_elan_param_register_int ("free_list_inc", 32);
|
||||
|
||||
mca_base_param_reg_string( (mca_base_component_t*)&mca_btl_elan_component, "elanidmap",
|
||||
"System-wide configuration file for the Quadrics network (elanidmap)",
|
||||
false, false, "/etc/elanidmap", &mca_btl_elan_component.elanidmap_file );
|
||||
@ -121,6 +112,16 @@ int mca_btl_elan_component_open(void)
|
||||
" in performances",
|
||||
false, false, 128, &mca_btl_elan_component.elan_max_posted_recv );
|
||||
|
||||
/* register Elan4 component parameters */
|
||||
mca_btl_elan_component.elan_free_list_num =
|
||||
mca_btl_elan_param_register_int( "free_list_num", 8 );
|
||||
mca_btl_elan_component.elan_free_list_max =
|
||||
mca_btl_elan_param_register_int( "free_list_max",
|
||||
(mca_btl_elan_component.elan_free_list_num +
|
||||
mca_btl_elan_component.elan_max_posted_recv) );
|
||||
mca_btl_elan_component.elan_free_list_inc =
|
||||
mca_btl_elan_param_register_int( "free_list_inc", 32 );
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -238,6 +239,9 @@ mca_btl_elan_component_init( int *num_btl_modules,
|
||||
OBJ_CONSTRUCT( &btl->elan_lock, opal_mutex_t );
|
||||
OBJ_CONSTRUCT( &btl->send_list, opal_list_t );
|
||||
OBJ_CONSTRUCT( &btl->rdma_list, opal_list_t );
|
||||
OBJ_CONSTRUCT( &btl->recv_list, opal_list_t );
|
||||
|
||||
btl->expect_tport_recv = 1;
|
||||
|
||||
mca_btl_elan_component.elan_btls[count++] = btl;
|
||||
}
|
||||
@ -284,6 +288,42 @@ int mca_btl_elan_component_progress( void )
|
||||
elan_queueRxComplete( elan_btl->rx_queue );
|
||||
num_progressed++;
|
||||
}
|
||||
if(elan_btl->expect_tport_recv) { /* There is a pending message on the tport */
|
||||
mca_btl_elan_frag_t* frag = (mca_btl_elan_frag_t*)opal_list_get_first( &(elan_btl->recv_list) );
|
||||
if( elan_done(frag->elan_event, 0) ) {
|
||||
int tag;
|
||||
size_t length;
|
||||
mca_btl_active_message_callback_t* reg;
|
||||
void* recv_buf;
|
||||
recv_buf = (mca_btl_elan_hdr_t*)elan_tportRxWait( frag->elan_event,
|
||||
NULL, &tag, &length );
|
||||
num_progressed++;
|
||||
/*elan_btl->expect_tport_recv--;*/
|
||||
|
||||
OPAL_THREAD_LOCK(&elan_btl->elan_lock);
|
||||
opal_list_remove_first( &(elan_btl->recv_list) );
|
||||
OPAL_THREAD_UNLOCK(&elan_btl->elan_lock);
|
||||
|
||||
frag->base.des_dst->seg_addr.pval = (void*)recv_buf;
|
||||
frag->base.des_dst->seg_len = length;
|
||||
frag->tag = (mca_btl_base_tag_t)tag;
|
||||
reg = mca_btl_base_active_message_trigger + frag->tag;
|
||||
reg->cbfunc( &(elan_btl->super), frag->tag, &(frag->base), reg->cbdata );
|
||||
if( recv_buf != (void*)(frag+1) ) {
|
||||
elan_tportBufFree( elan_btl->tport, recv_buf );
|
||||
frag->base.des_dst->seg_addr.pval = (void*)(frag+1);
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&elan_btl->elan_lock);
|
||||
frag->elan_event = elan_tportRxStart( elan_btl->tport,
|
||||
ELAN_TPORT_RXBUF | ELAN_TPORT_RXANY,
|
||||
0, 0, 0, 0,
|
||||
frag->base.des_dst->seg_addr.pval,
|
||||
mca_btl_elan_module.super.btl_eager_limit );
|
||||
opal_list_append( &(elan_btl->recv_list), (opal_list_item_t*)frag );
|
||||
OPAL_THREAD_UNLOCK(&elan_btl->elan_lock);
|
||||
}
|
||||
}
|
||||
/* If there are any pending sends check their completion */
|
||||
if( !opal_list_is_empty( &(elan_btl->send_list) ) ) {
|
||||
mca_btl_elan_frag_t* frag = (mca_btl_elan_frag_t*)opal_list_get_first( &(elan_btl->send_list) );
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user