A better version alowing for multi-rails or clusters of clusters. A lot of cleanups.
This commit was SVN r14963.
Этот коммит содержится в:
родитель
c66cf32ee2
Коммит
e2dd0a50fc
@ -117,7 +117,7 @@ struct mca_btl_mx_module_t {
|
||||
mca_btl_base_module_t super; /**< base BTL interface */
|
||||
mca_btl_base_recv_reg_t mx_reg[MCA_BTL_TAG_MAX]; /**< the PML registered callbacks */
|
||||
mx_endpoint_t mx_endpoint; /**< local MX endpoint */
|
||||
mx_endpoint_addr_t mx_endpoint_addr; /**< local MX endpoint address */
|
||||
mx_endpoint_addr_t mx_endpoint_addr; /**< local MX endpoint address */
|
||||
uint32_t mx_unique_network_id; /**< unique identifier for this BTL,
|
||||
* based on the MAC address of the
|
||||
* mapper used to route messages.
|
||||
|
@ -130,9 +130,18 @@ int mca_btl_mx_component_open(void)
|
||||
}
|
||||
|
||||
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "max_posted_recv",
|
||||
"Number of received posted in advance. Increasing this number for communication bound application can lead to visible improvement in performances",
|
||||
"Number of received posted in advance. Increasing this number for"
|
||||
" communication bound application can lead to visible improvement"
|
||||
" in performances",
|
||||
false, false, 16, &mca_btl_mx_component.mx_max_posted_recv );
|
||||
|
||||
mca_base_param_reg_string( (mca_base_component_t*)&mca_btl_mx_component, "if_include",
|
||||
"Myrinet card to use (last 6 digits from the mapper MAC)",
|
||||
false, false, NULL, &mca_btl_mx_component.mx_if_include );
|
||||
mca_base_param_reg_string( (mca_base_component_t*)&mca_btl_mx_component, "if_exclude",
|
||||
"Myrinet card to avoid (last 6 digits from the mapper MAC)",
|
||||
false, false, NULL, &mca_btl_mx_component.mx_if_exclude );
|
||||
|
||||
mca_btl_mx_module.super.btl_exclusivity = 50;
|
||||
mca_btl_mx_module.super.btl_eager_limit = 4096;
|
||||
mca_btl_mx_module.super.btl_min_send_size = 4096;
|
||||
@ -166,6 +175,15 @@ int mca_btl_mx_component_close(void)
|
||||
OBJ_DESTRUCT(&mca_btl_mx_component.mx_send_user_frags);
|
||||
OBJ_DESTRUCT(&mca_btl_mx_component.mx_procs);
|
||||
OBJ_DESTRUCT(&mca_btl_mx_component.mx_lock);
|
||||
|
||||
if( NULL != mca_btl_mx_component.mx_if_include ) {
|
||||
free( mca_btl_mx_component.mx_if_include );
|
||||
mca_btl_mx_component.mx_if_include = NULL;
|
||||
}
|
||||
if( NULL != mca_btl_mx_component.mx_if_exclude ) {
|
||||
free( mca_btl_mx_component.mx_if_exclude );
|
||||
mca_btl_mx_component.mx_if_exclude = NULL;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -218,13 +236,58 @@ static mca_btl_mx_module_t* mca_btl_mx_create(uint64_t addr)
|
||||
{
|
||||
mca_btl_mx_module_t* mx_btl;
|
||||
mx_return_t status;
|
||||
uint32_t nic_id;
|
||||
uint32_t nic_id, mx_unique_network_id = 0;
|
||||
char mapper_mac[7], *where;
|
||||
|
||||
status = mx_nic_id_to_board_number( addr, &nic_id );
|
||||
if( MX_SUCCESS != status ) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if MX_HAVE_MAPPER_STATE
|
||||
{
|
||||
mx_return_t ret;
|
||||
mx_endpt_handle_t endp_handle;
|
||||
mx_mapper_state_t ms;
|
||||
|
||||
ret = mx_open_board( nic_id, &endp_handle );
|
||||
if( MX_SUCCESS != ret ) {
|
||||
opal_output( 0, "Unable to open board %d: %s\n", nic_id, mx_strerror(ret) );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ms.board_number = nic_id;
|
||||
ms.iport = 0;
|
||||
ret = mx__get_mapper_state( endp_handle, &ms );
|
||||
if( MX_SUCCESS != ret ) {
|
||||
opal_output( 0, "get_mapper_state failed for board %d: %s\n",
|
||||
nic_id, mx_strerror(ret) );
|
||||
return NULL;
|
||||
}
|
||||
/* Keep the first 4 bytes for the network speed */
|
||||
mx_unique_network_id = ((ms.mapper_mac[3] << 16) +
|
||||
(ms.mapper_mac[4] << 8) +
|
||||
(ms.mapper_mac[5]));
|
||||
|
||||
}
|
||||
#endif /* MX_HAVE_MAPPER_STATE */
|
||||
|
||||
/* Try to figure out if we are allowed to use this network */
|
||||
snprintf( mapper_mac, 7, "%6x", mx_unique_network_id );
|
||||
|
||||
if( (NULL != mca_btl_mx_component.mx_if_exclude) &&
|
||||
(NULL != (where = strstr(mca_btl_mx_component.mx_if_exclude, mapper_mac))) ) {
|
||||
opal_output( 0, "MX network %d connected to the mapper %s has been excluded\n",
|
||||
nic_id, mapper_mac );
|
||||
return NULL;
|
||||
}
|
||||
else if( (NULL != mca_btl_mx_component.mx_if_include) &&
|
||||
(NULL == (where = strstr(mca_btl_mx_component.mx_if_include, mapper_mac))) ) {
|
||||
opal_output( 0, "MX network %d connected to the mapper %s has not been included\n",
|
||||
nic_id, mapper_mac );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mx_btl = malloc(sizeof(mca_btl_mx_module_t));
|
||||
if( NULL == mx_btl ) return NULL;
|
||||
|
||||
@ -243,52 +306,27 @@ static mca_btl_mx_module_t* mca_btl_mx_create(uint64_t addr)
|
||||
mca_btl_mx_finalize( &mx_btl->super );
|
||||
return NULL;
|
||||
}
|
||||
#if MX_HAVE_MAPPER_STATE
|
||||
{
|
||||
mx_return_t ret;
|
||||
mx_endpt_handle_t endp_handle;
|
||||
mx_mapper_state_t ms;
|
||||
|
||||
ret = mx_open_board( nic_id, &endp_handle );
|
||||
if( MX_SUCCESS != ret ) {
|
||||
opal_output( 0, "Unable to open board %d: %s\n", nic_id, mx_strerror(ret) );
|
||||
mca_btl_mx_finalize( &mx_btl->super );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ms.board_number = nic_id;
|
||||
ms.iport = 0;
|
||||
ret = mx__get_mapper_state( endp_handle, &ms );
|
||||
if( MX_SUCCESS != ret ) {
|
||||
opal_output( 0, "get_mapper_state failed for board %d: %s\n",
|
||||
nic_id, mx_strerror(ret) );
|
||||
mca_btl_mx_finalize( &mx_btl->super );
|
||||
return NULL;
|
||||
}
|
||||
/* Keep the first 4 bytes for the network speed */
|
||||
mx_btl->mx_unique_network_id = ((ms.mapper_mac[3] << 16) +
|
||||
(ms.mapper_mac[4] << 8) +
|
||||
(ms.mapper_mac[5]));
|
||||
mx_btl->mx_unique_network_id = mx_unique_network_id;
|
||||
#if defined(MX_HAS_NET_TYPE)
|
||||
{
|
||||
int value;
|
||||
if( (status = mx_get_info( mx_btl->mx_endpoint, MX_LINE_SPEED, NULL, 0,
|
||||
&value, sizeof(int))) != MX_SUCCESS ) {
|
||||
opal_output( 0, "mx_get_info(MX_LINE_SPEED) failed with status %d (%s)\n",
|
||||
status, mx_strerror(status) );
|
||||
}
|
||||
if( MX_SPEED_2G == value ) {
|
||||
mx_btl->mx_unique_network_id |= 0xaa00000000;
|
||||
mx_btl->super.btl_bandwidth = 2000;
|
||||
} else if( MX_SPEED_10G == value ) {
|
||||
mx_btl->mx_unique_network_id |= 0xbb00000000;
|
||||
mx_btl->super.btl_bandwidth = 10000;
|
||||
} else {
|
||||
mx_btl->mx_unique_network_id |= 0xcc00000000;
|
||||
mx_btl->super.btl_bandwidth = 1000; /* some value */
|
||||
}
|
||||
#endif /* defined(MX_HAS_NET_TYPE) */
|
||||
|
||||
}
|
||||
if( MX_SPEED_2G == value ) {
|
||||
mx_btl->mx_unique_network_id |= 0xaa00000000;
|
||||
mx_btl->super.btl_bandwidth = 2000;
|
||||
} else if( MX_SPEED_10G == value ) {
|
||||
mx_btl->mx_unique_network_id |= 0xbb00000000;
|
||||
mx_btl->super.btl_bandwidth = 10000;
|
||||
} else {
|
||||
mx_btl->mx_unique_network_id |= 0xcc00000000;
|
||||
mx_btl->super.btl_bandwidth = 1000; /* whatever */
|
||||
}
|
||||
}
|
||||
#endif /* MX_HAVE_MAPPER_STATE */
|
||||
#endif /* defined(MX_HAS_NET_TYPE) */
|
||||
|
||||
#if 0
|
||||
{
|
||||
@ -476,7 +514,6 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size = sizeof(mca_btl_mx_addr_t) * mca_btl_mx_component.mx_num_btls;
|
||||
mx_addrs = (mca_btl_mx_addr_t*)calloc( mca_btl_mx_component.mx_num_btls, sizeof(mca_btl_mx_addr_t) );
|
||||
if( NULL == mx_addrs ) {
|
||||
free( nic_addrs );
|
||||
@ -489,29 +526,30 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
|
||||
if( NULL == mx_btl ) {
|
||||
continue;
|
||||
}
|
||||
status = mx_decompose_endpoint_addr( mx_btl->mx_endpoint_addr, &(mx_addrs[i].nic_id),
|
||||
&(mx_addrs[i].endpoint_id) );
|
||||
status = mx_decompose_endpoint_addr( mx_btl->mx_endpoint_addr, &(mx_addrs[count].nic_id),
|
||||
&(mx_addrs[count].endpoint_id) );
|
||||
if( MX_SUCCESS != status ) {
|
||||
mca_btl_mx_finalize( &mx_btl->super );
|
||||
continue;
|
||||
}
|
||||
mx_addrs[i].unique_network_id = mx_btl->mx_unique_network_id;
|
||||
mx_addrs[count].unique_network_id = mx_btl->mx_unique_network_id;
|
||||
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
BTL_MX_ADDR_HTON(mx_addrs[i]);
|
||||
BTL_MX_ADDR_HTON(mx_addrs[count]);
|
||||
#endif
|
||||
mca_btl_mx_component.mx_btls[count++] = mx_btl;
|
||||
mca_btl_mx_component.mx_btls[count] = mx_btl;
|
||||
count++; /* one more succesfully initialized MX interface */
|
||||
}
|
||||
mca_btl_mx_component.mx_num_btls = count;
|
||||
*num_btl_modules = count;
|
||||
size = sizeof(mca_btl_mx_addr_t) * count;
|
||||
if( 0 == count ) {
|
||||
/* No active BTL module */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* publish the MX addresses via the MCA framework */
|
||||
mca_pml_base_modex_send( &mca_btl_mx_component.super.btl_version, mx_addrs, size );
|
||||
mca_pml_base_modex_send( &mca_btl_mx_component.super.btl_version, mx_addrs,
|
||||
sizeof(mca_btl_mx_addr_t) * mca_btl_mx_component.mx_num_btls );
|
||||
|
||||
free( nic_addrs );
|
||||
free( mx_addrs );
|
||||
|
@ -105,6 +105,9 @@ static mca_btl_mx_proc_t* mca_btl_mx_proc_lookup_ompi(ompi_proc_t* ompi_proc)
|
||||
mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
{
|
||||
mca_btl_mx_proc_t* module_proc = NULL;
|
||||
mca_btl_mx_addr_t *mx_peers;
|
||||
int rc, i;
|
||||
size_t size;
|
||||
|
||||
/* Check if we have already created a MX proc
|
||||
* structure for this ompi process */
|
||||
@ -114,13 +117,37 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
return module_proc;
|
||||
}
|
||||
|
||||
/* Oops! First time, gotta create a new MX proc
|
||||
* out of the ompi_proc ... */
|
||||
/* query for the peer address info */
|
||||
rc = mca_pml_base_modex_recv( &mca_btl_mx_component.super.btl_version,
|
||||
ompi_proc, (void*)&mx_peers, &size );
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
opal_output( 0, "mca_pml_base_modex_recv failed for peer [%ld,%ld,%ld]",
|
||||
ORTE_NAME_ARGS(&ompi_proc->proc_name) );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if( size < sizeof(mca_btl_mx_addr_t) ) { /* no available connection */
|
||||
return NULL;
|
||||
}
|
||||
if( (size % sizeof(mca_btl_mx_addr_t)) != 0 ) {
|
||||
opal_output( 0, "invalid mx address for peer [%ld,%ld,%ld]",
|
||||
ORTE_NAME_ARGS(&ompi_proc->proc_name) );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
module_proc = OBJ_NEW(mca_btl_mx_proc_t);
|
||||
|
||||
module_proc->proc_ompi = ompi_proc;
|
||||
|
||||
module_proc->mx_peers_count = size / sizeof(mca_btl_mx_addr_t);
|
||||
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
for (i = 0 ; i < module_proc->mx_peers_count ; ++i) {
|
||||
BTL_MX_ADDR_NTOH(mx_peers[i]);
|
||||
}
|
||||
#endif
|
||||
module_proc->mx_peers = mx_peers;
|
||||
|
||||
return module_proc;
|
||||
}
|
||||
|
||||
@ -133,37 +160,8 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
int mca_btl_mx_proc_insert( mca_btl_mx_proc_t* module_proc,
|
||||
mca_btl_mx_endpoint_t* module_endpoint )
|
||||
{
|
||||
mca_btl_mx_addr_t *mx_peers;
|
||||
int rc, i, j;
|
||||
mca_btl_mx_module_t* mx_btl;
|
||||
size_t size;
|
||||
|
||||
/* query for the peer address info */
|
||||
rc = mca_pml_base_modex_recv( &mca_btl_mx_component.super.btl_version,
|
||||
module_proc->proc_ompi, (void*)&mx_peers, &size );
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
opal_output( 0, "mca_pml_base_modex_recv failed for peer [%ld,%ld,%ld]",
|
||||
ORTE_NAME_ARGS(&module_proc->proc_ompi->proc_name) );
|
||||
OBJ_RELEASE(module_proc);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
if( (size % sizeof(mca_btl_mx_addr_t)) != 0 ) {
|
||||
opal_output( 0, "invalid mx address for peer [%ld,%ld,%ld]",
|
||||
ORTE_NAME_ARGS(&module_proc->proc_ompi->proc_name) );
|
||||
OBJ_RELEASE(module_proc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
module_proc->mx_peers_count = size / sizeof(mca_btl_mx_addr_t);
|
||||
if( 0 == module_proc->mx_peers_count ) { /* no available connection */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
for (i = 0 ; i < module_proc->mx_peers_count ; ++i) {
|
||||
BTL_MX_ADDR_NTOH(mx_peers[i]);
|
||||
}
|
||||
#endif
|
||||
int i, j;
|
||||
|
||||
/**
|
||||
* Check if there is any Myrinet network between myself and the peer
|
||||
@ -172,7 +170,7 @@ int mca_btl_mx_proc_insert( mca_btl_mx_proc_t* module_proc,
|
||||
mx_btl = mca_btl_mx_component.mx_btls[i];
|
||||
|
||||
for( j = 0; j < module_proc->mx_peers_count; j++ ) {
|
||||
if( mx_btl->mx_unique_network_id == mx_peers[j].unique_network_id ) {
|
||||
if( mx_btl->mx_unique_network_id == module_proc->mx_peers[j].unique_network_id ) {
|
||||
/* There is at least one connection between these two nodes */
|
||||
goto create_peer_endpoint;
|
||||
}
|
||||
@ -187,15 +185,13 @@ int mca_btl_mx_proc_insert( mca_btl_mx_proc_t* module_proc,
|
||||
create_peer_endpoint:
|
||||
mx_btl = module_endpoint->endpoint_btl;
|
||||
for( j = 0; j < module_proc->mx_peers_count; j++ ) {
|
||||
if( mx_btl->mx_unique_network_id == mx_peers[j].unique_network_id ) {
|
||||
module_endpoint->mx_peer.nic_id = mx_peers[j].nic_id;
|
||||
module_endpoint->mx_peer.endpoint_id = mx_peers[j].endpoint_id;
|
||||
if( mx_btl->mx_unique_network_id == module_proc->mx_peers[j].unique_network_id ) {
|
||||
module_endpoint->mx_peer.nic_id = module_proc->mx_peers[j].nic_id;
|
||||
module_endpoint->mx_peer.endpoint_id = module_proc->mx_peers[j].endpoint_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
module_proc->mx_peers = mx_peers;
|
||||
|
||||
if( NULL == module_proc->proc_endpoints ) {
|
||||
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
|
||||
malloc(module_proc->mx_peers_count * sizeof(mca_btl_base_endpoint_t*));
|
||||
@ -231,8 +227,10 @@ int mca_btl_mx_proc_connect( mca_btl_mx_endpoint_t* module_endpoint )
|
||||
if( MX_SUCCESS != mx_nic_id_to_hostname( module_endpoint->mx_peer.nic_id, peer_name ) )
|
||||
sprintf( peer_name, "unknown %lx nic_id", (long)module_endpoint->mx_peer.nic_id );
|
||||
|
||||
opal_output( 0, "mx_connect fail for %s with key %x (error %s)\n",
|
||||
peer_name, mca_btl_mx_component.mx_filter, mx_strerror(mx_status) );
|
||||
opal_output( 0, "mx_connect fail for %s with key %x (error %s)\n\tUnique ID (local %x remote %x)\n",
|
||||
peer_name, mca_btl_mx_component.mx_filter, mx_strerror(mx_status),
|
||||
module_endpoint->endpoint_btl->mx_unique_network_id,
|
||||
module_endpoint->mx_peer.unique_network_id );
|
||||
}
|
||||
module_endpoint->status = MCA_BTL_MX_NOT_REACHEABLE;
|
||||
return OMPI_ERROR;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user