3078be40aa
but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
194 строки
6.2 KiB
C
194 строки
6.2 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "class/opal_hash_table.h"
|
|
#include "mca/pml/base/pml_base_module_exchange.h"
|
|
|
|
#include "btl_mx.h"
|
|
#include "btl_mx_proc.h"
|
|
|
|
static void mca_btl_mx_proc_construct(mca_btl_mx_proc_t* proc);
|
|
static void mca_btl_mx_proc_destruct(mca_btl_mx_proc_t* proc);
|
|
|
|
OBJ_CLASS_INSTANCE(mca_btl_mx_proc_t,
|
|
opal_list_item_t, mca_btl_mx_proc_construct,
|
|
mca_btl_mx_proc_destruct);
|
|
|
|
void mca_btl_mx_proc_construct(mca_btl_mx_proc_t* proc)
|
|
{
|
|
proc->proc_ompi = 0;
|
|
proc->proc_addr_index = 0;
|
|
proc->proc_endpoints = 0;
|
|
proc->proc_endpoint_count = 0;
|
|
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
|
|
/* add to list of all proc instance */
|
|
OPAL_THREAD_LOCK(&mca_btl_mx_component.mx_lock);
|
|
opal_list_append(&mca_btl_mx_component.mx_procs, &proc->super);
|
|
OPAL_THREAD_UNLOCK(&mca_btl_mx_component.mx_lock);
|
|
}
|
|
|
|
/*
|
|
* Cleanup MX proc instance
|
|
*/
|
|
|
|
void mca_btl_mx_proc_destruct(mca_btl_mx_proc_t* proc)
|
|
{
|
|
/* remove from list of all proc instances */
|
|
OPAL_THREAD_LOCK(&mca_btl_mx_component.mx_lock);
|
|
opal_list_remove_item(&mca_btl_mx_component.mx_procs, &proc->super);
|
|
OPAL_THREAD_UNLOCK(&mca_btl_mx_component.mx_lock);
|
|
|
|
/* release resources */
|
|
if(NULL != proc->proc_endpoints) {
|
|
free(proc->proc_endpoints);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Look for an existing MX process instances based on the associated
|
|
* ompi_proc_t instance.
|
|
*/
|
|
static mca_btl_mx_proc_t* mca_btl_mx_proc_lookup_ompi(ompi_proc_t* ompi_proc)
|
|
{
|
|
mca_btl_mx_proc_t* mx_proc;
|
|
|
|
OPAL_THREAD_LOCK(&mca_btl_mx_component.mx_lock);
|
|
|
|
for(mx_proc = (mca_btl_mx_proc_t*)
|
|
opal_list_get_first(&mca_btl_mx_component.mx_procs);
|
|
mx_proc != (mca_btl_mx_proc_t*)
|
|
opal_list_get_end(&mca_btl_mx_component.mx_procs);
|
|
mx_proc = (mca_btl_mx_proc_t*)opal_list_get_next(mx_proc)) {
|
|
|
|
if(mx_proc->proc_ompi == ompi_proc) {
|
|
OPAL_THREAD_UNLOCK(&mca_btl_mx_component.mx_lock);
|
|
return mx_proc;
|
|
}
|
|
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_btl_mx_component.mx_lock);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Create a MX process structure. There is a one-to-one correspondence
|
|
* between a ompi_proc_t and a mca_btl_mx_proc_t instance. We cache
|
|
* additional data (specifically the list of mca_btl_mx_endpoint_t instances,
|
|
* and published addresses) associated w/ a given destination on this
|
|
* datastructure.
|
|
*/
|
|
|
|
mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
|
{
|
|
mca_btl_mx_proc_t* module_proc = NULL;
|
|
|
|
/* Check if we have already created a MX proc
|
|
* structure for this ompi process */
|
|
module_proc = mca_btl_mx_proc_lookup_ompi(ompi_proc);
|
|
if( module_proc != NULL ) {
|
|
/* Gotcha! */
|
|
return module_proc;
|
|
}
|
|
|
|
/* Oops! First time, gotta create a new MX proc
|
|
* out of the ompi_proc ... */
|
|
|
|
module_proc = OBJ_NEW(mca_btl_mx_proc_t);
|
|
|
|
module_proc->proc_ompi = ompi_proc;
|
|
|
|
return module_proc;
|
|
}
|
|
|
|
|
|
/*
|
|
* Note that this routine must be called with the lock on the process
|
|
* already held. Insert a btl instance into the proc array and assign
|
|
* it an address.
|
|
*/
|
|
int mca_btl_mx_proc_insert( mca_btl_mx_proc_t* module_proc,
|
|
mca_btl_mx_endpoint_t* module_endpoint )
|
|
{
|
|
mx_return_t mx_status;
|
|
mx_endpoint_addr_t mx_remote_addr;
|
|
mca_btl_mx_addr_t *mx_peers;
|
|
int num_retry = 0, rc, count, i;
|
|
size_t size;
|
|
|
|
/* query for the peer address info */
|
|
rc = mca_pml_base_modex_recv( &mca_btl_mx_component.super.btl_version,
|
|
module_proc->proc_ompi, (void*)&mx_peers, &size );
|
|
if( OMPI_SUCCESS != rc ) {
|
|
opal_output( 0, "mca_pml_base_modex_recv failed for peer [%d,%d,%d]",
|
|
ORTE_NAME_ARGS(&module_proc->proc_ompi->proc_name) );
|
|
OBJ_RELEASE(module_proc);
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
if( (size % sizeof(mca_btl_mx_addr_t)) != 0 ) {
|
|
opal_output( 0, "invalid mx address for peer [%d,%d,%d]",
|
|
ORTE_NAME_ARGS(&module_proc->proc_ompi->proc_name) );
|
|
OBJ_RELEASE(module_proc);
|
|
return OMPI_ERROR;
|
|
}
|
|
count = size / sizeof(mca_btl_mx_addr_t);
|
|
|
|
for( i = module_proc->proc_addr_index; i < count; i++ ) {
|
|
|
|
retry_connect:
|
|
mx_status = mx_connect( module_endpoint->endpoint_btl->mx_endpoint, mx_peers[i].nic_id, mx_peers[i].endpoint_id,
|
|
mca_btl_mx_component.mx_filter, 5, &mx_remote_addr );
|
|
if( MX_SUCCESS != mx_status ) {
|
|
opal_output( 0, "mx_connect fail for %dth remote address key %x (error %s)\n",
|
|
i, mca_btl_mx_component.mx_filter, mx_strerror(mx_status) );
|
|
if( MX_TIMEOUT == mx_status )
|
|
if( num_retry++ < 5 )
|
|
goto retry_connect;
|
|
continue;
|
|
}
|
|
module_endpoint->mx_peer.nic_id = mx_peers[i].nic_id;
|
|
module_endpoint->mx_peer.endpoint_id = mx_peers[i].endpoint_id;
|
|
module_endpoint->mx_peer_addr = mx_remote_addr;
|
|
module_proc->proc_addr_index = i;
|
|
break;
|
|
}
|
|
|
|
free( mx_peers );
|
|
|
|
if( i == count ) { /* no available connection */
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
if( NULL == module_proc->proc_endpoints ) {
|
|
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
|
|
malloc(count * sizeof(mca_btl_base_endpoint_t*));
|
|
if(NULL == module_proc->proc_endpoints) {
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
}
|
|
|
|
/* insert into endpoint array */
|
|
module_endpoint->endpoint_proc = module_proc;
|
|
module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint;
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|