1
1

Improve the performance of the MX BTL. Correct the fake PUT

protocol.

This commit was SVN r17452.
Этот коммит содержится в:
George Bosilca 2008-02-14 04:38:55 +00:00
родитель e7aaf6aa67
Коммит 255cd2186b
5 изменённых файлов: 105 добавлений и 90 удалений

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* Copyright (c) 2004-2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -136,7 +136,8 @@ int mca_btl_mx_register( struct mca_btl_base_module_t* btl,
mx_segment.segment_ptr = (void*)(frag+1);
mx_segment.segment_length = mx_btl->super.btl_eager_limit;
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1, 0x0ULL, 0x0ULL,
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
0x01ULL, BTL_MX_RECV_MASK,
frag, &(frag->mx_request) );
if( MX_SUCCESS != mx_return ) {
opal_output( 0, "mca_btl_mx_register: mx_irecv failed with status %d (%s)\n",
@ -317,7 +318,8 @@ mca_btl_base_descriptor_t* mca_btl_mx_prepare_dst( struct mca_btl_base_module_t*
mx_segment.segment_ptr = frag->segment[0].seg_addr.pval;
mx_segment.segment_length = frag->segment[0].seg_len;
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1, frag->segment[0].seg_key.key64,
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
frag->segment[0].seg_key.key64,
BTL_MX_PUT_MASK, NULL, &(frag->mx_request) );
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
opal_output( 0, "Fail to re-register a fragment with the MX NIC ...\n" );
@ -325,6 +327,17 @@ mca_btl_base_descriptor_t* mca_btl_mx_prepare_dst( struct mca_btl_base_module_t*
return NULL;
}
#ifdef HAVE_MX_FORGET
{
mx_return = mx_forget( mx_btl->mx_endpoint, &(frag->mx_request) );
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
opal_output( 0, "mx_forget failed in mca_btl_mx_prepare_dst with error %d (%s)\n",
mx_return, mx_strerror(mx_return) );
return NULL;
}
}
#endif
/* Allow the fragment to be recycled using the mca_btl_mx_free function */
frag->type = MCA_BTL_MX_SEND;
@ -372,7 +385,8 @@ static int mca_btl_mx_put( struct mca_btl_base_module_t* btl,
mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt,
endpoint->mx_peer_addr,
descriptor->des_dst[0].seg_key.key64, frag, &frag->mx_request );
descriptor->des_dst[0].seg_key.key64, frag,
&frag->mx_request );
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
return OMPI_ERROR;
@ -400,7 +414,7 @@ int mca_btl_mx_send( struct mca_btl_base_module_t* btl,
mca_btl_mx_frag_t* frag = (mca_btl_mx_frag_t*)descriptor;
mx_segment_t mx_segment[2];
mx_return_t mx_return;
uint64_t total_length = 0;
uint64_t total_length = 0, tag64;
uint32_t i = 0;
if( OPAL_UNLIKELY(MCA_BTL_MX_CONNECTED != ((mca_btl_mx_endpoint_t*)endpoint)->status) ) {
@ -421,8 +435,10 @@ int mca_btl_mx_send( struct mca_btl_base_module_t* btl,
total_length += descriptor->des_src[i].seg_len;
} while (++i < descriptor->des_src_cnt);
mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt, endpoint->mx_peer_addr,
(uint64_t)tag, frag, &frag->mx_request );
tag64 = 0x01ULL | (((uint64_t)tag) << 8);
mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt,
endpoint->mx_peer_addr,
tag64, frag, &frag->mx_request );
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
return OMPI_ERROR;

Просмотреть файл

@ -42,14 +42,12 @@
#include <mx_extensions.h>
#endif /* HAVE_MX_EXTENSIONS_H */
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
/**
* The mask used for receive and for the PUT protocol
*/
#define BTL_MX_RECV_MASK 0x0000ffffffffffffULL
#define BTL_MX_RECV_MASK 0x00000000000000ffULL
#define BTL_MX_PUT_MASK 0xffffffffffffffffULL
/**
@ -310,7 +308,6 @@ mca_btl_mx_prepare_dst( struct mca_btl_base_module_t* btl,
*/
int mca_btl_mx_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
END_C_DECLS
#endif

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* Copyright (c) 2004-2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -143,9 +143,9 @@ int mca_btl_mx_component_open(void)
false, false, NULL, &mca_btl_mx_component.mx_if_exclude );
mca_btl_mx_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT;
mca_btl_mx_module.super.btl_eager_limit = 4096;
mca_btl_mx_module.super.btl_rndv_eager_limit = 4096;
mca_btl_mx_module.super.btl_max_send_size = 64*1024;
mca_btl_mx_module.super.btl_eager_limit = 1024;
mca_btl_mx_module.super.btl_rndv_eager_limit = 1024;
mca_btl_mx_module.super.btl_max_send_size = 8*1024;
mca_btl_mx_module.super.btl_rdma_pipeline_send_length = 256*1024;
mca_btl_mx_module.super.btl_rdma_pipeline_frag_size = 8*1024*1024;
mca_btl_mx_module.super.btl_min_rdma_pipeline_size = 0;
@ -215,11 +215,10 @@ mca_btl_mx_unexpected_handler( void *context, mx_endpoint_addr_t source,
/*opal_output( 0, "Get unexpected handler context %p source %lld match_value %lld\n"
"\tlength %d data %p\n", context, source.stuff[0], match_value, length,
data_if_available );*/
if( match_value > MCA_BTL_TAG_MAX )
if( !(0x01 & match_value) )
return MX_RECV_CONTINUE;
tag = match_value & 0xff;
assert( tag < 16 );
tag = (match_value >> 8) & 0xff;
reg = mca_btl_base_active_message_trigger + tag;
segment.seg_addr.pval = data_if_available;
@ -437,11 +436,14 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
return NULL;
}
/* set the MX error handle to always return. This function is the only MX function
* allowed to be called before mx_init in order to make sure that if the MX is not
* up and running the MX library does not exit the application.
/**
* As the MX MTL get initialized before the MX BTL it will call the
* mx_init and the environment variables set by the BTL will be useless.
* Closing the MX will force the next call to mx_init to take these
* environment variables into account.
*/
mx_set_error_handler(MX_ERRORS_RETURN);
/*(void)ompi_common_mx_finalize();*/
if( 0 == mca_btl_mx_component.mx_support_sharedmem )
opal_setenv( "MX_DISABLE_SHMEM", "1", true, &environ );
if( 0 == mca_btl_mx_component.mx_support_self )
@ -449,6 +451,12 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
/* Force the long pipeline (up to 4Kb fragments) */
opal_setenv( "MX_PIPELINE_LOG", "0", true, &environ );
/* set the MX error handle to always return. This function is the only MX function
* allowed to be called before mx_init in order to make sure that if the MX is not
* up and running the MX library does not exit the application.
*/
mx_set_error_handler(MX_ERRORS_RETURN);
/* First check if MX is available ... */
if( OMPI_SUCCESS != ompi_common_mx_initialize() ) {
ompi_modex_send(&mca_btl_mx_component.super.btl_version,
@ -618,14 +626,14 @@ int mca_btl_mx_component_progress(void)
*/
frag = mx_status.context;
if( NULL != frag ) {
if( 0xff == frag->type ) { /* it's a send */
if( MCA_BTL_MX_SEND == frag->type ) { /* it's a send */
/* call the completion callback */
frag->base.des_cbfunc( &(mx_btl->super), frag->endpoint,
&(frag->base), OMPI_SUCCESS );
} else if( !mca_btl_mx_component.mx_use_unexpected ) { /* and this one is a receive */
mca_btl_active_message_callback_t* reg;
mx_segment_t mx_segment;
uint8_t tag = mx_status.match_info & 0xff;
uint8_t tag = (mx_status.match_info >> 8) & 0xff;
reg = mca_btl_base_active_message_trigger + tag;
frag->base.des_dst->seg_len = mx_status.msg_length;
@ -638,7 +646,7 @@ int mca_btl_mx_component_progress(void)
mx_segment.segment_ptr = frag->base.des_dst->seg_addr.pval;
mx_segment.segment_length = mca_btl_mx_module.super.btl_eager_limit;
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
0x0ULL, 0x0ULL,
0x01ULL, BTL_MX_RECV_MASK,
frag, &(frag->mx_request) );
if( MX_SUCCESS != mx_return ) {
opal_output( 0, "Fail to re-register a fragment with the MX NIC ... (%s)\n",

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -20,24 +20,20 @@
#define MCA_BTL_MX_FRAG_H
#define MCA_BTL_MX_FRAG_ALIGN (8)
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "ompi/class/ompi_free_list.h"
#include "btl_mx.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#define MCA_BTL_MX_SEND 0x01
#define MCA_BTL_MX_RECV 0x02
/**
BEGIN_C_DECLS
/**
* MX send framxent derived type.
*/
struct mca_btl_mx_frag_t {
struct mca_btl_mx_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment[2];
struct mca_btl_base_endpoint_t* endpoint;
@ -45,51 +41,49 @@ extern "C" {
mx_request_t mx_request;
size_t size;
ompi_free_list_t* mx_frag_list;
};
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_t);
};
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_t);
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_eager_t;
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_eager_t);
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_eager_t);
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_max_t;
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_max_t);
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_max_t);
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_user_t;
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_user_t;
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_user_t);
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_user_t);
/*
/*
* Macros to allocate/return descriptors from module specific
* free list(s).
*/
#define MCA_BTL_MX_FRAG_ALLOC_EAGER(btl, frag, rc) \
{ \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_WAIT( &mca_btl_mx_component.mx_send_eager_frags, item, rc); \
frag = (mca_btl_mx_frag_t*) item; \
frag->mx_frag_list = &(mca_btl_mx_component.mx_send_eager_frags); \
frag->segment[0].seg_addr.pval = (void*)(frag+1); \
}
}
#define MCA_BTL_MX_FRAG_ALLOC_USER(btl, frag, rc) \
{ \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_WAIT( &mca_btl_mx_component.mx_send_user_frags, item, rc); \
frag = (mca_btl_mx_frag_t*) item; \
frag->mx_frag_list = &(mca_btl_mx_component.mx_send_user_frags); \
}
}
#define MCA_BTL_MX_FRAG_RETURN(btl, frag) \
{ \
/*opal_output( 0, "return item to %p\n", frag->mx_frag_list );*/ \
{ \
OMPI_FREE_LIST_RETURN( frag->mx_frag_list, (ompi_free_list_item_t*)(frag)); \
}
}
END_C_DECLS
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -106,7 +106,7 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
{
mca_btl_mx_proc_t* module_proc = NULL;
mca_btl_mx_addr_t *mx_peers;
int rc, i;
int rc;
size_t size;
/* Check if we have already created a MX proc
@ -142,8 +142,8 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
module_proc->mx_peers_count = size / sizeof(mca_btl_mx_addr_t);
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
for (i = 0 ; i < module_proc->mx_peers_count ; ++i) {
BTL_MX_ADDR_NTOH(mx_peers[i]);
for (rc = 0 ; rc < module_proc->mx_peers_count ; ++rc) {
BTL_MX_ADDR_NTOH(mx_peers[rc]);
}
#endif
module_proc->mx_peers = mx_peers;