Improve the performance of the MX BTL. Correct the fake PUT
protocol. This commit was SVN r17452.
Этот коммит содержится в:
родитель
e7aaf6aa67
Коммит
255cd2186b
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -136,7 +136,8 @@ int mca_btl_mx_register( struct mca_btl_base_module_t* btl,
|
||||
|
||||
mx_segment.segment_ptr = (void*)(frag+1);
|
||||
mx_segment.segment_length = mx_btl->super.btl_eager_limit;
|
||||
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1, 0x0ULL, 0x0ULL,
|
||||
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
|
||||
0x01ULL, BTL_MX_RECV_MASK,
|
||||
frag, &(frag->mx_request) );
|
||||
if( MX_SUCCESS != mx_return ) {
|
||||
opal_output( 0, "mca_btl_mx_register: mx_irecv failed with status %d (%s)\n",
|
||||
@ -317,7 +318,8 @@ mca_btl_base_descriptor_t* mca_btl_mx_prepare_dst( struct mca_btl_base_module_t*
|
||||
|
||||
mx_segment.segment_ptr = frag->segment[0].seg_addr.pval;
|
||||
mx_segment.segment_length = frag->segment[0].seg_len;
|
||||
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1, frag->segment[0].seg_key.key64,
|
||||
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
|
||||
frag->segment[0].seg_key.key64,
|
||||
BTL_MX_PUT_MASK, NULL, &(frag->mx_request) );
|
||||
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
|
||||
opal_output( 0, "Fail to re-register a fragment with the MX NIC ...\n" );
|
||||
@ -325,6 +327,17 @@ mca_btl_base_descriptor_t* mca_btl_mx_prepare_dst( struct mca_btl_base_module_t*
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef HAVE_MX_FORGET
|
||||
{
|
||||
mx_return = mx_forget( mx_btl->mx_endpoint, &(frag->mx_request) );
|
||||
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
|
||||
opal_output( 0, "mx_forget failed in mca_btl_mx_prepare_dst with error %d (%s)\n",
|
||||
mx_return, mx_strerror(mx_return) );
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Allow the fragment to be recycled using the mca_btl_mx_free function */
|
||||
frag->type = MCA_BTL_MX_SEND;
|
||||
|
||||
@ -372,7 +385,8 @@ static int mca_btl_mx_put( struct mca_btl_base_module_t* btl,
|
||||
|
||||
mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt,
|
||||
endpoint->mx_peer_addr,
|
||||
descriptor->des_dst[0].seg_key.key64, frag, &frag->mx_request );
|
||||
descriptor->des_dst[0].seg_key.key64, frag,
|
||||
&frag->mx_request );
|
||||
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
|
||||
opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
|
||||
return OMPI_ERROR;
|
||||
@ -400,7 +414,7 @@ int mca_btl_mx_send( struct mca_btl_base_module_t* btl,
|
||||
mca_btl_mx_frag_t* frag = (mca_btl_mx_frag_t*)descriptor;
|
||||
mx_segment_t mx_segment[2];
|
||||
mx_return_t mx_return;
|
||||
uint64_t total_length = 0;
|
||||
uint64_t total_length = 0, tag64;
|
||||
uint32_t i = 0;
|
||||
|
||||
if( OPAL_UNLIKELY(MCA_BTL_MX_CONNECTED != ((mca_btl_mx_endpoint_t*)endpoint)->status) ) {
|
||||
@ -421,8 +435,10 @@ int mca_btl_mx_send( struct mca_btl_base_module_t* btl,
|
||||
total_length += descriptor->des_src[i].seg_len;
|
||||
} while (++i < descriptor->des_src_cnt);
|
||||
|
||||
mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt, endpoint->mx_peer_addr,
|
||||
(uint64_t)tag, frag, &frag->mx_request );
|
||||
tag64 = 0x01ULL | (((uint64_t)tag) << 8);
|
||||
mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt,
|
||||
endpoint->mx_peer_addr,
|
||||
tag64, frag, &frag->mx_request );
|
||||
if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
|
||||
opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
|
||||
return OMPI_ERROR;
|
||||
|
@ -42,14 +42,12 @@
|
||||
#include <mx_extensions.h>
|
||||
#endif /* HAVE_MX_EXTENSIONS_H */
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* The mask used for receive and for the PUT protocol
|
||||
*/
|
||||
#define BTL_MX_RECV_MASK 0x0000ffffffffffffULL
|
||||
#define BTL_MX_RECV_MASK 0x00000000000000ffULL
|
||||
#define BTL_MX_PUT_MASK 0xffffffffffffffffULL
|
||||
|
||||
/**
|
||||
@ -310,7 +308,6 @@ mca_btl_mx_prepare_dst( struct mca_btl_base_module_t* btl,
|
||||
*/
|
||||
int mca_btl_mx_ft_event(int state);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -143,9 +143,9 @@ int mca_btl_mx_component_open(void)
|
||||
false, false, NULL, &mca_btl_mx_component.mx_if_exclude );
|
||||
|
||||
mca_btl_mx_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT;
|
||||
mca_btl_mx_module.super.btl_eager_limit = 4096;
|
||||
mca_btl_mx_module.super.btl_rndv_eager_limit = 4096;
|
||||
mca_btl_mx_module.super.btl_max_send_size = 64*1024;
|
||||
mca_btl_mx_module.super.btl_eager_limit = 1024;
|
||||
mca_btl_mx_module.super.btl_rndv_eager_limit = 1024;
|
||||
mca_btl_mx_module.super.btl_max_send_size = 8*1024;
|
||||
mca_btl_mx_module.super.btl_rdma_pipeline_send_length = 256*1024;
|
||||
mca_btl_mx_module.super.btl_rdma_pipeline_frag_size = 8*1024*1024;
|
||||
mca_btl_mx_module.super.btl_min_rdma_pipeline_size = 0;
|
||||
@ -215,11 +215,10 @@ mca_btl_mx_unexpected_handler( void *context, mx_endpoint_addr_t source,
|
||||
/*opal_output( 0, "Get unexpected handler context %p source %lld match_value %lld\n"
|
||||
"\tlength %d data %p\n", context, source.stuff[0], match_value, length,
|
||||
data_if_available );*/
|
||||
if( match_value > MCA_BTL_TAG_MAX )
|
||||
if( !(0x01 & match_value) )
|
||||
return MX_RECV_CONTINUE;
|
||||
|
||||
tag = match_value & 0xff;
|
||||
assert( tag < 16 );
|
||||
tag = (match_value >> 8) & 0xff;
|
||||
reg = mca_btl_base_active_message_trigger + tag;
|
||||
|
||||
segment.seg_addr.pval = data_if_available;
|
||||
@ -437,11 +436,14 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* set the MX error handle to always return. This function is the only MX function
|
||||
* allowed to be called before mx_init in order to make sure that if the MX is not
|
||||
* up and running the MX library does not exit the application.
|
||||
/**
|
||||
* As the MX MTL get initialized before the MX BTL it will call the
|
||||
* mx_init and the environment variables set by the BTL will be useless.
|
||||
* Closing the MX will force the next call to mx_init to take these
|
||||
* environment variables into account.
|
||||
*/
|
||||
mx_set_error_handler(MX_ERRORS_RETURN);
|
||||
/*(void)ompi_common_mx_finalize();*/
|
||||
|
||||
if( 0 == mca_btl_mx_component.mx_support_sharedmem )
|
||||
opal_setenv( "MX_DISABLE_SHMEM", "1", true, &environ );
|
||||
if( 0 == mca_btl_mx_component.mx_support_self )
|
||||
@ -449,6 +451,12 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
|
||||
/* Force the long pipeline (up to 4Kb fragments) */
|
||||
opal_setenv( "MX_PIPELINE_LOG", "0", true, &environ );
|
||||
|
||||
/* set the MX error handle to always return. This function is the only MX function
|
||||
* allowed to be called before mx_init in order to make sure that if the MX is not
|
||||
* up and running the MX library does not exit the application.
|
||||
*/
|
||||
mx_set_error_handler(MX_ERRORS_RETURN);
|
||||
|
||||
/* First check if MX is available ... */
|
||||
if( OMPI_SUCCESS != ompi_common_mx_initialize() ) {
|
||||
ompi_modex_send(&mca_btl_mx_component.super.btl_version,
|
||||
@ -618,15 +626,15 @@ int mca_btl_mx_component_progress(void)
|
||||
*/
|
||||
frag = mx_status.context;
|
||||
if( NULL != frag ) {
|
||||
if( 0xff == frag->type ) { /* it's a send */
|
||||
if( MCA_BTL_MX_SEND == frag->type ) { /* it's a send */
|
||||
/* call the completion callback */
|
||||
frag->base.des_cbfunc( &(mx_btl->super), frag->endpoint,
|
||||
&(frag->base), OMPI_SUCCESS );
|
||||
} else if( !mca_btl_mx_component.mx_use_unexpected ) { /* and this one is a receive */
|
||||
mca_btl_active_message_callback_t* reg;
|
||||
mx_segment_t mx_segment;
|
||||
uint8_t tag = mx_status.match_info & 0xff;
|
||||
|
||||
uint8_t tag = (mx_status.match_info >> 8) & 0xff;
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + tag;
|
||||
frag->base.des_dst->seg_len = mx_status.msg_length;
|
||||
reg->cbfunc( &(mx_btl->super), tag, &(frag->base), reg->cbdata );
|
||||
@ -638,7 +646,7 @@ int mca_btl_mx_component_progress(void)
|
||||
mx_segment.segment_ptr = frag->base.des_dst->seg_addr.pval;
|
||||
mx_segment.segment_length = mca_btl_mx_module.super.btl_eager_limit;
|
||||
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
|
||||
0x0ULL, 0x0ULL,
|
||||
0x01ULL, BTL_MX_RECV_MASK,
|
||||
frag, &(frag->mx_request) );
|
||||
if( MX_SUCCESS != mx_return ) {
|
||||
opal_output( 0, "Fail to re-register a fragment with the MX NIC ... (%s)\n",
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -20,76 +20,70 @@
|
||||
#define MCA_BTL_MX_FRAG_H
|
||||
|
||||
|
||||
#define MCA_BTL_MX_FRAG_ALIGN (8)
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "btl_mx.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MCA_BTL_MX_SEND 0x01
|
||||
#define MCA_BTL_MX_RECV 0x02
|
||||
|
||||
/**
|
||||
* MX send framxent derived type.
|
||||
*/
|
||||
struct mca_btl_mx_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_base_segment_t segment[2];
|
||||
struct mca_btl_base_endpoint_t* endpoint;
|
||||
uint8_t type;
|
||||
mx_request_t mx_request;
|
||||
size_t size;
|
||||
ompi_free_list_t* mx_frag_list;
|
||||
};
|
||||
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_t);
|
||||
|
||||
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_eager_t;
|
||||
BEGIN_C_DECLS
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_eager_t);
|
||||
|
||||
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_max_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_max_t);
|
||||
/**
|
||||
* MX send framxent derived type.
|
||||
*/
|
||||
struct mca_btl_mx_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_base_segment_t segment[2];
|
||||
struct mca_btl_base_endpoint_t* endpoint;
|
||||
uint8_t type;
|
||||
mx_request_t mx_request;
|
||||
size_t size;
|
||||
ompi_free_list_t* mx_frag_list;
|
||||
};
|
||||
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_t);
|
||||
|
||||
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_user_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_user_t);
|
||||
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_eager_t;
|
||||
|
||||
/*
|
||||
* Macros to allocate/return descriptors from module specific
|
||||
* free list(s).
|
||||
*/
|
||||
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_eager_t);
|
||||
|
||||
#define MCA_BTL_MX_FRAG_ALLOC_EAGER(btl, frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_WAIT( &mca_btl_mx_component.mx_send_eager_frags, item, rc); \
|
||||
frag = (mca_btl_mx_frag_t*) item; \
|
||||
frag->mx_frag_list = &(mca_btl_mx_component.mx_send_eager_frags); \
|
||||
frag->segment[0].seg_addr.pval = (void*)(frag+1); \
|
||||
}
|
||||
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_max_t;
|
||||
|
||||
#define MCA_BTL_MX_FRAG_ALLOC_USER(btl, frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_WAIT( &mca_btl_mx_component.mx_send_user_frags, item, rc); \
|
||||
frag = (mca_btl_mx_frag_t*) item; \
|
||||
frag->mx_frag_list = &(mca_btl_mx_component.mx_send_user_frags); \
|
||||
}
|
||||
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_max_t);
|
||||
|
||||
#define MCA_BTL_MX_FRAG_RETURN(btl, frag) \
|
||||
{ \
|
||||
/*opal_output( 0, "return item to %p\n", frag->mx_frag_list );*/ \
|
||||
OMPI_FREE_LIST_RETURN( frag->mx_frag_list, (ompi_free_list_item_t*)(frag)); \
|
||||
}
|
||||
typedef struct mca_btl_mx_frag_t mca_btl_mx_frag_user_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_mx_frag_user_t);
|
||||
|
||||
/*
|
||||
* Macros to allocate/return descriptors from module specific
|
||||
* free list(s).
|
||||
*/
|
||||
|
||||
#define MCA_BTL_MX_FRAG_ALLOC_EAGER(btl, frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_WAIT( &mca_btl_mx_component.mx_send_eager_frags, item, rc); \
|
||||
frag = (mca_btl_mx_frag_t*) item; \
|
||||
frag->mx_frag_list = &(mca_btl_mx_component.mx_send_eager_frags); \
|
||||
frag->segment[0].seg_addr.pval = (void*)(frag+1); \
|
||||
}
|
||||
|
||||
#define MCA_BTL_MX_FRAG_ALLOC_USER(btl, frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_WAIT( &mca_btl_mx_component.mx_send_user_frags, item, rc); \
|
||||
frag = (mca_btl_mx_frag_t*) item; \
|
||||
frag->mx_frag_list = &(mca_btl_mx_component.mx_send_user_frags); \
|
||||
}
|
||||
|
||||
#define MCA_BTL_MX_FRAG_RETURN(btl, frag) \
|
||||
{ \
|
||||
OMPI_FREE_LIST_RETURN( frag->mx_frag_list, (ompi_free_list_item_t*)(frag)); \
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
@ -106,7 +106,7 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
{
|
||||
mca_btl_mx_proc_t* module_proc = NULL;
|
||||
mca_btl_mx_addr_t *mx_peers;
|
||||
int rc, i;
|
||||
int rc;
|
||||
size_t size;
|
||||
|
||||
/* Check if we have already created a MX proc
|
||||
@ -142,8 +142,8 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
module_proc->mx_peers_count = size / sizeof(mca_btl_mx_addr_t);
|
||||
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
for (i = 0 ; i < module_proc->mx_peers_count ; ++i) {
|
||||
BTL_MX_ADDR_NTOH(mx_peers[i]);
|
||||
for (rc = 0 ; rc < module_proc->mx_peers_count ; ++rc) {
|
||||
BTL_MX_ADDR_NTOH(mx_peers[rc]);
|
||||
}
|
||||
#endif
|
||||
module_proc->mx_peers = mx_peers;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user