8b9e8054fd
than just the PML/BTLs these days. Also clean up the code so that it handles the situation where not all nodes register information for a given node (rather than just spinning until that node sends information, like we do today). Includes r15234 and r15265 from the /tmp/bwb-modex branch. This commit was SVN r15310. The following SVN revisions from the original message are invalid or inconsistent and therefore were not cross-referenced: r15234 r15265
244 строки
8.1 KiB
C
244 строки
8.1 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "opal/class/opal_hash_table.h"
|
|
#include "ompi/runtime/ompi_module_exchange.h"
|
|
|
|
#include "btl_mx.h"
|
|
#include "btl_mx_proc.h"
|
|
|
|
static void mca_btl_mx_proc_construct(mca_btl_mx_proc_t* proc);
|
|
static void mca_btl_mx_proc_destruct(mca_btl_mx_proc_t* proc);
|
|
|
|
OBJ_CLASS_INSTANCE(mca_btl_mx_proc_t,
|
|
opal_list_item_t, mca_btl_mx_proc_construct,
|
|
mca_btl_mx_proc_destruct);
|
|
|
|
void mca_btl_mx_proc_construct(mca_btl_mx_proc_t* proc)
|
|
{
|
|
proc->proc_ompi = 0;
|
|
proc->proc_endpoints = NULL;
|
|
proc->proc_endpoint_count = 0;
|
|
proc->mx_peers_count = 0;
|
|
proc->mx_peers = NULL;
|
|
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
|
|
/* add to list of all proc instance */
|
|
OPAL_THREAD_LOCK(&mca_btl_mx_component.mx_lock);
|
|
opal_list_append(&mca_btl_mx_component.mx_procs, &proc->super);
|
|
OPAL_THREAD_UNLOCK(&mca_btl_mx_component.mx_lock);
|
|
}
|
|
|
|
/*
|
|
* Cleanup MX proc instance
|
|
*/
|
|
|
|
void mca_btl_mx_proc_destruct(mca_btl_mx_proc_t* proc)
|
|
{
|
|
/* remove from list of all proc instances */
|
|
OPAL_THREAD_LOCK(&mca_btl_mx_component.mx_lock);
|
|
opal_list_remove_item(&mca_btl_mx_component.mx_procs, &proc->super);
|
|
OPAL_THREAD_UNLOCK(&mca_btl_mx_component.mx_lock);
|
|
|
|
/* release resources */
|
|
if( NULL != proc->proc_endpoints ) {
|
|
free(proc->proc_endpoints);
|
|
proc->proc_endpoints = NULL;
|
|
}
|
|
if( NULL != proc->mx_peers ) {
|
|
free(proc->mx_peers);
|
|
proc->mx_peers = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Look for an existing MX process instances based on the associated
|
|
* ompi_proc_t instance.
|
|
*/
|
|
static mca_btl_mx_proc_t* mca_btl_mx_proc_lookup_ompi(ompi_proc_t* ompi_proc)
|
|
{
|
|
mca_btl_mx_proc_t* mx_proc;
|
|
|
|
OPAL_THREAD_LOCK(&mca_btl_mx_component.mx_lock);
|
|
|
|
for( mx_proc = (mca_btl_mx_proc_t*)opal_list_get_first(&mca_btl_mx_component.mx_procs);
|
|
mx_proc != (mca_btl_mx_proc_t*)opal_list_get_end(&mca_btl_mx_component.mx_procs);
|
|
mx_proc = (mca_btl_mx_proc_t*)opal_list_get_next(mx_proc) ) {
|
|
|
|
if(mx_proc->proc_ompi == ompi_proc) {
|
|
OPAL_THREAD_UNLOCK(&mca_btl_mx_component.mx_lock);
|
|
return mx_proc;
|
|
}
|
|
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_btl_mx_component.mx_lock);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Create a MX process structure. There is a one-to-one correspondence
|
|
* between a ompi_proc_t and a mca_btl_mx_proc_t instance. We cache
|
|
* additional data (specifically the list of mca_btl_mx_endpoint_t instances,
|
|
* and published addresses) associated w/ a given destination on this
|
|
* datastructure.
|
|
*/
|
|
|
|
mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
|
{
|
|
mca_btl_mx_proc_t* module_proc = NULL;
|
|
mca_btl_mx_addr_t *mx_peers;
|
|
int rc, i;
|
|
size_t size;
|
|
|
|
/* Check if we have already created a MX proc
|
|
* structure for this ompi process */
|
|
module_proc = mca_btl_mx_proc_lookup_ompi(ompi_proc);
|
|
if( module_proc != NULL ) {
|
|
/* Gotcha! */
|
|
return module_proc;
|
|
}
|
|
|
|
/* query for the peer address info */
|
|
rc = ompi_modex_recv( &mca_btl_mx_component.super.btl_version,
|
|
ompi_proc, (void*)&mx_peers, &size );
|
|
if( OMPI_SUCCESS != rc ) {
|
|
opal_output( 0, "mca_pml_base_modex_recv failed for peer [%ld,%ld,%ld]",
|
|
ORTE_NAME_ARGS(&ompi_proc->proc_name) );
|
|
return NULL;
|
|
}
|
|
|
|
if( size < sizeof(mca_btl_mx_addr_t) ) { /* no available connection */
|
|
return NULL;
|
|
}
|
|
if( (size % sizeof(mca_btl_mx_addr_t)) != 0 ) {
|
|
opal_output( 0, "invalid mx address for peer [%ld,%ld,%ld]",
|
|
ORTE_NAME_ARGS(&ompi_proc->proc_name) );
|
|
return NULL;
|
|
}
|
|
|
|
|
|
module_proc = OBJ_NEW(mca_btl_mx_proc_t);
|
|
module_proc->proc_ompi = ompi_proc;
|
|
|
|
module_proc->mx_peers_count = size / sizeof(mca_btl_mx_addr_t);
|
|
|
|
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
|
for (i = 0 ; i < module_proc->mx_peers_count ; ++i) {
|
|
BTL_MX_ADDR_NTOH(mx_peers[i]);
|
|
}
|
|
#endif
|
|
module_proc->mx_peers = mx_peers;
|
|
|
|
return module_proc;
|
|
}
|
|
|
|
|
|
/*
|
|
* Note that this routine must be called with the lock on the process
|
|
* already held. Insert a btl instance into the proc array and assign
|
|
* it an address.
|
|
*/
|
|
int mca_btl_mx_proc_insert( mca_btl_mx_proc_t* module_proc,
|
|
mca_btl_mx_endpoint_t* module_endpoint )
|
|
{
|
|
mca_btl_mx_module_t* mx_btl;
|
|
int i, j;
|
|
|
|
/**
|
|
* Check if there is any Myrinet network between myself and the peer
|
|
*/
|
|
for( i = 0; i < mca_btl_mx_component.mx_num_btls; i++ ) {
|
|
mx_btl = mca_btl_mx_component.mx_btls[i];
|
|
|
|
for( j = 0; j < module_proc->mx_peers_count; j++ ) {
|
|
if( mx_btl->mx_unique_network_id == module_proc->mx_peers[j].unique_network_id ) {
|
|
/* There is at least one connection between these two nodes */
|
|
goto create_peer_endpoint;
|
|
}
|
|
}
|
|
}
|
|
module_proc->mx_peers_count = 0;
|
|
/**
|
|
* No Myrinet connectivity. Let the PML layer figure out another
|
|
* way to communicate with the peer.
|
|
*/
|
|
return OMPI_ERROR;
|
|
create_peer_endpoint:
|
|
mx_btl = module_endpoint->endpoint_btl;
|
|
for( j = 0; j < module_proc->mx_peers_count; j++ ) {
|
|
if( mx_btl->mx_unique_network_id == module_proc->mx_peers[j].unique_network_id ) {
|
|
module_endpoint->mx_peer.nic_id = module_proc->mx_peers[j].nic_id;
|
|
module_endpoint->mx_peer.endpoint_id = module_proc->mx_peers[j].endpoint_id;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if( NULL == module_proc->proc_endpoints ) {
|
|
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
|
|
malloc(module_proc->mx_peers_count * sizeof(mca_btl_base_endpoint_t*));
|
|
if( NULL == module_proc->proc_endpoints ) {
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
}
|
|
module_endpoint->endpoint_proc = module_proc;
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_btl_mx_proc_connect( mca_btl_mx_endpoint_t* module_endpoint )
|
|
{
|
|
int num_retry = 0;
|
|
mx_return_t mx_status;
|
|
mx_endpoint_addr_t mx_remote_addr;
|
|
mca_btl_mx_proc_t* module_proc = module_endpoint->endpoint_proc;
|
|
|
|
module_endpoint->status = MCA_BTL_MX_CONNECTION_PENDING;
|
|
|
|
retry_connect:
|
|
mx_status = mx_connect( module_endpoint->endpoint_btl->mx_endpoint,
|
|
module_endpoint->mx_peer.nic_id, module_endpoint->mx_peer.endpoint_id,
|
|
mca_btl_mx_component.mx_filter, mca_btl_mx_component.mx_timeout, &mx_remote_addr );
|
|
if( MX_SUCCESS != mx_status ) {
|
|
if( MX_TIMEOUT == mx_status )
|
|
if( num_retry++ < mca_btl_mx_component.mx_connection_retries )
|
|
goto retry_connect;
|
|
{
|
|
char peer_name[MX_MAX_HOSTNAME_LEN];
|
|
|
|
if( MX_SUCCESS != mx_nic_id_to_hostname( module_endpoint->mx_peer.nic_id, peer_name ) )
|
|
sprintf( peer_name, "unknown %lx nic_id", (long)module_endpoint->mx_peer.nic_id );
|
|
|
|
opal_output( 0, "mx_connect fail for %s with key %x (error %s)\n\tUnique ID (local %x remote %x)\n",
|
|
peer_name, mca_btl_mx_component.mx_filter, mx_strerror(mx_status),
|
|
module_endpoint->endpoint_btl->mx_unique_network_id,
|
|
module_endpoint->mx_peer.unique_network_id );
|
|
}
|
|
module_endpoint->status = MCA_BTL_MX_NOT_REACHEABLE;
|
|
return OMPI_ERROR;
|
|
}
|
|
module_endpoint->mx_peer_addr = mx_remote_addr;
|
|
module_endpoint->status = MCA_BTL_MX_CONNECTED;
|
|
|
|
module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint;
|
|
return OMPI_SUCCESS;
|
|
}
|