1
1
openmpi/ompi/mca/btl/mx/btl_mx_component.c
George Bosilca 3078be40aa First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing
but are not that bad either.

On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get:

  0:       1 bytes  13096 times -->      1.10 Mbps in       6.94 usec
  1:       2 bytes  14408 times -->      2.17 Mbps in       7.02 usec
  2:       3 bytes  14243 times -->      3.24 Mbps in       7.07 usec
  3:       4 bytes   9428 times -->      4.27 Mbps in       7.15 usec
  4:       6 bytes  10493 times -->      6.26 Mbps in       7.32 usec
  5:       8 bytes   6834 times -->      8.18 Mbps in       7.47 usec
  6:      12 bytes   8371 times -->     11.89 Mbps in       7.70 usec
  7:      13 bytes   5411 times -->     12.72 Mbps in       7.80 usec
  8:      16 bytes   5919 times -->     15.35 Mbps in       7.95 usec
  9:      19 bytes   7074 times -->     17.66 Mbps in       8.21 usec
 10:      21 bytes   7696 times -->     19.00 Mbps in       8.43 usec
 11:      24 bytes   7906 times -->     20.87 Mbps in       8.77 usec
 12:      27 bytes   8073 times -->     23.05 Mbps in       8.94 usec
 13:      29 bytes   4972 times -->     24.32 Mbps in       9.10 usec
 14:      32 bytes   5307 times -->     26.29 Mbps in       9.29 usec
 15:      35 bytes   5720 times -->     33.61 Mbps in       7.95 usec
 16:      45 bytes   7191 times -->     39.50 Mbps in       8.69 usec
 17:      48 bytes   7670 times -->     41.33 Mbps in       8.86 usec
 18:      51 bytes   7759 times -->     42.80 Mbps in       9.09 usec
 19:      61 bytes   4313 times -->     47.44 Mbps in       9.81 usec
 20:      64 bytes   5012 times -->     57.61 Mbps in       8.48 usec
 21:      67 bytes   6083 times -->     59.31 Mbps in       8.62 usec
 22:      93 bytes   6234 times -->     68.08 Mbps in      10.42 usec
 23:      96 bytes   6396 times -->     80.65 Mbps in       9.08 usec
 24:      99 bytes   7455 times -->     81.56 Mbps in       9.26 usec
 25:     125 bytes   3926 times -->    112.46 Mbps in       8.48 usec
 26:     128 bytes   5848 times -->    116.87 Mbps in       8.36 usec
 27:     131 bytes   6077 times -->    119.22 Mbps in       8.38 usec
 28:     189 bytes   6192 times -->    163.79 Mbps in       8.80 usec
 29:     192 bytes   7572 times -->    168.01 Mbps in       8.72 usec
 30:     195 bytes   7705 times -->    171.13 Mbps in       8.69 usec
 31:     253 bytes   4011 times -->    210.21 Mbps in       9.18 usec
 32:     256 bytes   5423 times -->    214.55 Mbps in       9.10 usec
 33:     259 bytes   5535 times -->    217.64 Mbps in       9.08 usec
 34:     381 bytes   5613 times -->    290.55 Mbps in      10.00 usec
 35:     384 bytes   6663 times -->    296.11 Mbps in       9.89 usec
 36:     387 bytes   6764 times -->    298.74 Mbps in       9.88 usec
 37:     509 bytes   3451 times -->    353.78 Mbps in      10.98 usec
 38:     512 bytes   4546 times -->    359.36 Mbps in      10.87 usec
 39:     515 bytes   4617 times -->    361.53 Mbps in      10.87 usec
 40:     765 bytes   4645 times -->    461.41 Mbps in      12.65 usec
 41:     768 bytes   5270 times -->    468.59 Mbps in      12.50 usec
 42:     771 bytes   5341 times -->    470.16 Mbps in      12.51 usec
 43:    1021 bytes   2695 times -->    508.42 Mbps in      15.32 usec
 44:    1024 bytes   3260 times -->    514.44 Mbps in      15.19 usec
 45:    1027 bytes   3298 times -->    515.72 Mbps in      15.19 usec
 46:    1533 bytes   3307 times -->    707.12 Mbps in      16.54 usec
 47:    1536 bytes   4030 times -->    714.93 Mbps in      16.39 usec
 48:    1539 bytes   4071 times -->    714.41 Mbps in      16.44 usec
 49:    2045 bytes   2040 times -->    761.38 Mbps in      20.49 usec
 50:    2048 bytes   2438 times -->    769.78 Mbps in      20.30 usec
 51:    2051 bytes   2465 times -->    769.78 Mbps in      20.33 usec
 52:    3069 bytes   2465 times -->    923.43 Mbps in      25.36 usec
 53:    3072 bytes   2629 times -->    928.48 Mbps in      25.24 usec
 54:    3075 bytes   2642 times -->    929.07 Mbps in      25.25 usec
 55:    4093 bytes   1323 times -->   1012.38 Mbps in      30.85 usec
 56:    4096 bytes   1620 times -->   1016.69 Mbps in      30.74 usec
 57:    4099 bytes   1627 times -->   1015.16 Mbps in      30.81 usec
 58:    6141 bytes   1625 times -->   1171.82 Mbps in      39.98 usec
 59:    6144 bytes   1667 times -->   1173.85 Mbps in      39.93 usec
 60:    6147 bytes   1669 times -->   1174.44 Mbps in      39.93 usec
 61:    8189 bytes    835 times -->   1232.43 Mbps in      50.69 usec
 62:    8192 bytes    986 times -->   1234.87 Mbps in      50.61 usec
 63:    8195 bytes    988 times -->   1234.85 Mbps in      50.63 usec
 64:   12285 bytes    988 times -->   1360.73 Mbps in      68.88 usec
 65:   12288 bytes    967 times -->   1364.20 Mbps in      68.72 usec
 66:   12291 bytes    970 times -->   1364.56 Mbps in      68.72 usec
 67:   16381 bytes    485 times -->   1385.48 Mbps in      90.21 usec
 68:   16384 bytes    554 times -->   1388.76 Mbps in      90.01 usec
 69:   16387 bytes    555 times -->   1388.41 Mbps in      90.05 usec
 70:   24573 bytes    555 times -->   1499.72 Mbps in     125.01 usec
 71:   24576 bytes    533 times -->   1499.36 Mbps in     125.05 usec
 72:   24579 bytes    533 times -->   1500.44 Mbps in     124.98 usec
 73:   32765 bytes    266 times -->   1499.31 Mbps in     166.73 usec
 74:   32768 bytes    299 times -->   1497.10 Mbps in     166.99 usec
 75:   32771 bytes    299 times -->   1495.29 Mbps in     167.21 usec
 76:   49149 bytes    299 times -->   1528.78 Mbps in     245.28 usec
 77:   49152 bytes    271 times -->   1527.97 Mbps in     245.42 usec
 78:   49155 bytes    271 times -->   1529.35 Mbps in     245.22 usec
 79:   65533 bytes    135 times -->   1586.19 Mbps in     315.21 usec
 80:   65536 bytes    158 times -->   1591.11 Mbps in     314.25 usec
 81:   65539 bytes    159 times -->   1586.50 Mbps in     315.17 usec
 82:   98301 bytes    158 times -->   1668.05 Mbps in     449.61 usec
 83:   98304 bytes    148 times -->   1667.40 Mbps in     449.80 usec
 84:   98307 bytes    148 times -->   1667.29 Mbps in     449.84 usec
 85:  131069 bytes     74 times -->   1709.11 Mbps in     585.09 usec
 86:  131072 bytes     85 times -->   1711.09 Mbps in     584.42 usec
 87:  131075 bytes     85 times -->   1710.92 Mbps in     584.49 usec
 88:  196605 bytes     85 times -->   1727.93 Mbps in     868.08 usec
 89:  196608 bytes     76 times -->   1726.28 Mbps in     868.92 usec
 90:  196611 bytes     76 times -->   1727.06 Mbps in     868.54 usec
 91:  262141 bytes     38 times -->   1757.65 Mbps in    1137.87 usec
 92:  262144 bytes     43 times -->   1758.69 Mbps in    1137.21 usec
 93:  262147 bytes     43 times -->   1759.38 Mbps in    1136.78 usec
 94:  393213 bytes     43 times -->   1801.51 Mbps in    1665.25 usec
 95:  393216 bytes     40 times -->   1803.26 Mbps in    1663.65 usec
 96:  393219 bytes     40 times -->   1800.73 Mbps in    1666.00 usec
 97:  524285 bytes     20 times -->   1805.33 Mbps in    2215.65 usec
 98:  524288 bytes     22 times -->   1806.80 Mbps in    2213.86 usec
 99:  524291 bytes     22 times -->   1805.77 Mbps in    2215.14 usec
100:  786429 bytes     22 times -->   1827.24 Mbps in    3283.64 usec
101:  786432 bytes     20 times -->   1827.03 Mbps in    3284.03 usec
102:  786435 bytes     20 times -->   1827.20 Mbps in    3283.73 usec
103: 1048573 bytes     10 times -->   1840.05 Mbps in    4347.71 usec
104: 1048576 bytes     11 times -->   1839.68 Mbps in    4348.58 usec
105: 1048579 bytes     11 times -->   1840.13 Mbps in    4347.54 usec
106: 1572861 bytes     11 times -->   1853.99 Mbps in    6472.50 usec
107: 1572864 bytes     10 times -->   1854.11 Mbps in    6472.10 usec
108: 1572867 bytes     10 times -->   1854.12 Mbps in    6472.10 usec
109: 2097149 bytes      5 times -->   1861.41 Mbps in    8595.61 usec
110: 2097152 bytes      5 times -->   1861.25 Mbps in    8596.40 usec
111: 2097155 bytes      5 times -->   1860.99 Mbps in    8597.59 usec
112: 3145725 bytes      5 times -->   1868.34 Mbps in   12845.59 usec
113: 3145728 bytes      5 times -->   1868.30 Mbps in   12845.90 usec
114: 3145731 bytes      5 times -->   1868.59 Mbps in   12843.89 usec
115: 4194301 bytes      3 times -->   1872.16 Mbps in   17092.51 usec
116: 4194304 bytes      3 times -->   1872.31 Mbps in   17091.19 usec
117: 4194307 bytes      3 times -->   1872.13 Mbps in   17092.82 usec
118: 6291453 bytes      3 times -->   1875.88 Mbps in   25588.00 usec
119: 6291456 bytes      3 times -->   1875.98 Mbps in   25586.68 usec
120: 6291459 bytes      3 times -->   1875.93 Mbps in   25587.36 usec
121: 8388605 bytes      3 times -->   1877.79 Mbps in   34082.69 usec
122: 8388608 bytes      3 times -->   1877.72 Mbps in   34083.84 usec
123: 8388611 bytes      3 times -->   1877.66 Mbps in   34085.00 usec

This commit was SVN r7180.
2005-09-04 22:08:13 +00:00

457 строки
17 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/include/constants.h"
#include "opal/event/event.h"
#include "opal/util/if.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "mca/pml/pml.h"
#include "mca/btl/btl.h"
#include "mca/base/mca_base_param.h"
#include "mca/pml/base/pml_base_module_exchange.h"
#include "mca/errmgr/errmgr.h"
#include "mca/mpool/base/base.h"
#include "btl_mx.h"
#include "btl_mx_frag.h"
#include "btl_mx_endpoint.h"
#include "mca/btl/base/base.h"
mca_btl_mx_component_t mca_btl_mx_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pml v1.0.0 component (which also implies a
specific MCA version) */
MCA_BTL_BASE_VERSION_1_0_0,
"mx", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_btl_mx_component_open, /* component open */
mca_btl_mx_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
mca_btl_mx_component_init,
mca_btl_mx_component_progress,
}
};
int mx_debug = 0;
/*
* utility routines for parameter registration
*/
static inline char* mca_btl_mx_param_register_string(
const char* param_name,
const char* default_value)
{
char *param_value;
int id = mca_base_param_register_string("btl","mx",param_name,NULL,default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
static inline int mca_btl_mx_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("btl","mx",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int mca_btl_mx_component_open(void)
{
/* initialize state */
mca_btl_mx_component.mx_num_btls=0;
mca_btl_mx_component.mx_btls=NULL;
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_mx_component.mx_procs, opal_list_t);
mca_btl_mx_component.mx_max_btls =
mca_btl_mx_param_register_int( "max_btls", 1 );
mca_btl_mx_component.mx_filter =
mca_btl_mx_param_register_int( "filter", 0xdeadbeef );
mca_btl_mx_component.mx_free_list_num =
mca_btl_mx_param_register_int ("free_list_num", 8);
mca_btl_mx_component.mx_free_list_inc =
mca_btl_mx_param_register_int ("free_list_inc", 32);
mca_btl_mx_component.mx_free_list_max =
mca_btl_mx_param_register_int ("free_list_max", 1024);
/* The ompi_free_list has a problem if the (max - num) is not
* divisible by the increament. So make sure it is ...
*/
if( (mca_btl_mx_component.mx_free_list_max - mca_btl_mx_component.mx_free_list_num) %
mca_btl_mx_component.mx_free_list_inc ) {
int overhead = (mca_btl_mx_component.mx_free_list_max - mca_btl_mx_component.mx_free_list_num) %
mca_btl_mx_component.mx_free_list_inc;
mca_btl_mx_component.mx_free_list_max -= overhead;
}
mca_btl_mx_component.mx_max_posted_recv =
mca_btl_mx_param_register_int("max_posted_recv", 16);
mca_btl_mx_module.super.btl_exclusivity =
mca_btl_mx_param_register_int ("exclusivity", 0);
mca_btl_mx_module.super.btl_eager_limit =
mca_btl_mx_param_register_int ("first_frag_size", 64*1024) - sizeof(mca_btl_base_header_t);
mca_btl_mx_module.super.btl_min_send_size =
mca_btl_mx_param_register_int ("min_send_size", 64*1024) - sizeof(mca_btl_base_header_t);
mca_btl_mx_module.super.btl_max_send_size =
mca_btl_mx_param_register_int ("max_send_size", 128*1024) - sizeof(mca_btl_base_header_t);
mca_btl_mx_module.super.btl_min_rdma_size =
mca_btl_mx_param_register_int("min_rdma_size", 1024*1024);
mca_btl_mx_module.super.btl_max_rdma_size =
mca_btl_mx_param_register_int("max_rdma_size", 1024*1024);
mca_btl_mx_module.super.btl_flags =
mca_btl_mx_param_register_int("flags", MCA_BTL_FLAGS_PUT);
mx_debug = mca_btl_mx_param_register_int( "debug", 0 );
return OMPI_SUCCESS;
}
/*
* component cleanup - sanity checking of queue lengths
*/
int mca_btl_mx_component_close(void)
{
if( NULL == mca_btl_mx_component.mx_btls )
return OMPI_SUCCESS;
mx_finalize();
#if OMPI_ENABLE_DEBUG
if (mca_btl_mx_component.mx_send_eager_frags.fl_num_allocated &&
mca_btl_mx_component.mx_send_eager_frags.fl_num_allocated !=
mca_btl_mx_component.mx_send_eager_frags.super.opal_list_length) {
opal_output(0, "mx send eager frags: %d allocated %d returned\n",
mca_btl_mx_component.mx_send_eager_frags.fl_num_allocated,
mca_btl_mx_component.mx_send_eager_frags.super.opal_list_length);
}
if (mca_btl_mx_component.mx_send_user_frags.fl_num_allocated &&
mca_btl_mx_component.mx_send_user_frags.fl_num_allocated !=
mca_btl_mx_component.mx_send_user_frags.super.opal_list_length) {
opal_output(0, "mx send user frags: %d allocated %d returned\n",
mca_btl_mx_component.mx_send_user_frags.fl_num_allocated,
mca_btl_mx_component.mx_send_user_frags.super.opal_list_length);
}
/* allow for pre-posted receives */
if (mca_btl_mx_component.mx_recv_frags.fl_num_allocated &&
mca_btl_mx_component.mx_recv_frags.fl_num_allocated - 3 >
mca_btl_mx_component.mx_recv_frags.super.opal_list_length) {
opal_output(0, "mx recv frags: %d allocated %d returned\n",
mca_btl_mx_component.mx_recv_frags.fl_num_allocated,
mca_btl_mx_component.mx_recv_frags.super.opal_list_length);
}
#endif
/* release resources */
OBJ_DESTRUCT(&mca_btl_mx_component.mx_send_eager_frags);
OBJ_DESTRUCT(&mca_btl_mx_component.mx_send_user_frags);
OBJ_DESTRUCT(&mca_btl_mx_component.mx_recv_frags);
OBJ_DESTRUCT(&mca_btl_mx_component.mx_procs);
OBJ_DESTRUCT(&mca_btl_mx_component.mx_pending_acks);
OBJ_DESTRUCT(&mca_btl_mx_component.mx_lock);
return OMPI_SUCCESS;
}
/*
* Create and intialize an MX PTL module, where each module
* represents a specific NIC.
*/
static mca_btl_mx_module_t* mca_btl_mx_create(uint64_t addr)
{
mca_btl_mx_module_t* mx_btl;
mx_return_t status;
uint32_t nic_id;
status = mx_nic_id_to_board_number( addr, &nic_id );
if( MX_SUCCESS != status ) {
return NULL;
}
mx_btl = malloc(sizeof(mca_btl_mx_module_t));
if( NULL == mx_btl ) return NULL;
/* copy over default settings */
memcpy( mx_btl, &mca_btl_mx_module, sizeof(mca_btl_mx_module_t) );
mx_btl->super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE; /* | MCA_BTL_FLAGS_PUT;*/
OBJ_CONSTRUCT( &mx_btl->mx_peers, opal_list_t );
OBJ_CONSTRUCT( &mx_btl->mx_lock, opal_mutex_t );
/* open local endpoint */
status = mx_open_endpoint( nic_id, MX_ANY_ENDPOINT,
mca_btl_mx_component.mx_filter,
NULL, 0, &mx_btl->mx_endpoint);
if(status != MX_SUCCESS) {
opal_output(0, "mca_btl_mx_init: mx_open_endpoint() failed with status=%d\n", status);
mca_btl_mx_finalize( &mx_btl->super );
return NULL;
}
/* query the endpoint address */
if((status = mx_get_endpoint_addr( mx_btl->mx_endpoint,
&mx_btl->mx_endpoint_addr)) != MX_SUCCESS) {
opal_output(0, "mca_btl_mx_init: mx_get_endpoint_addr() failed with status=%d\n", status);
mca_btl_mx_finalize( &mx_btl->super );
return NULL;
}
return mx_btl;
}
/*
* MX component initialization:
* - check if MX can be initialized.
* - and construct all static objects.
*/
mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
mca_btl_base_module_t** btls;
mx_return_t status;
uint32_t i, size, count;
uint64_t *nic_addrs;
mca_btl_mx_addr_t *mx_addrs;
*num_btl_modules = 0;
/* First check if MX is available ... */
if( MX_SUCCESS != (status = mx_init()) ) {
opal_output( 0, "mca_btl_mx_component_init: mx_init() failed with status = %d (%s)\n",
status, mx_strerror(status) );
return NULL;
}
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_mx_component.mx_send_eager_frags, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_mx_component.mx_send_user_frags, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_mx_component.mx_recv_frags, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_mx_component.mx_procs, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_mx_component.mx_pending_acks, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_mx_component.mx_lock, opal_mutex_t);
ompi_free_list_init( &mca_btl_mx_component.mx_send_eager_frags,
sizeof(mca_btl_mx_frag_t) + mca_btl_mx_module.super.btl_eager_limit,
OBJ_CLASS(mca_btl_mx_frag_t),
mca_btl_mx_component.mx_free_list_num,
mca_btl_mx_component.mx_free_list_max,
mca_btl_mx_component.mx_free_list_inc,
NULL ); /* use default allocator */
ompi_free_list_init( &mca_btl_mx_component.mx_send_user_frags,
sizeof(mca_btl_mx_frag_t),
OBJ_CLASS(mca_btl_mx_frag_t),
mca_btl_mx_component.mx_free_list_num,
mca_btl_mx_component.mx_free_list_max,
mca_btl_mx_component.mx_free_list_inc,
NULL ); /* use default allocator */
ompi_free_list_init( &mca_btl_mx_component.mx_recv_frags,
sizeof(mca_btl_mx_frag_t),
OBJ_CLASS(mca_btl_mx_frag_t),
mca_btl_mx_component.mx_free_list_num,
mca_btl_mx_component.mx_free_list_max,
mca_btl_mx_component.mx_free_list_inc,
NULL ); /* use default allocator */
/* intialize process hash table */
OBJ_CONSTRUCT( &mca_btl_mx_component.mx_procs, opal_list_t );
/* set the MX error handle to always return */
mx_set_error_handler(MX_ERRORS_RETURN);
/* Until this BTL reach a stable state let MX library generate assert for the errors */
/*mx_set_error_handler(MX_ERRORS_ARE_FATAL);*/
/* get the number of card available on the system */
if( (status = mx_get_info( NULL, MX_NIC_COUNT, NULL, 0,
&mca_btl_mx_component.mx_num_btls, sizeof(uint32_t))) != MX_SUCCESS ) {
opal_output(0, "mca_btl_mx_component_init: mx_get_info(MX_NIC_COUNT) failed with status=%d\n", status);
return NULL;
}
/* check for limit on number of btls */
if(mca_btl_mx_component.mx_num_btls > mca_btl_mx_component.mx_max_btls)
mca_btl_mx_component.mx_num_btls = mca_btl_mx_component.mx_max_btls;
/* Now we know how many NIC are available on the system. We will create a BTL for each one
* and then give a pointer to the BTL to the upper level.
*/
mca_btl_mx_component.mx_btls = malloc( mca_btl_mx_component.mx_num_btls * sizeof(mca_btl_base_module_t*) );
if( NULL == mca_btl_mx_component.mx_btls )
return NULL;
/* determine the NIC ids */
size = sizeof(uint64_t) * (mca_btl_mx_component.mx_num_btls + 1);
if( NULL == (nic_addrs = (uint64_t*)malloc(size)) )
return NULL;
if( (status = mx_get_info( NULL, MX_NIC_IDS, NULL, 0,
nic_addrs, size)) != MX_SUCCESS) {
free(nic_addrs);
return NULL;
}
size = sizeof(mca_btl_mx_addr_t) * mca_btl_mx_component.mx_num_btls;
mx_addrs = (mca_btl_mx_addr_t*)malloc( size );
if( NULL == mx_addrs ) {
free( nic_addrs );
return NULL;
}
/* create a btl for each NIC */
for( i = count = 0; i < mca_btl_mx_component.mx_num_btls; i++ ) {
mca_btl_mx_module_t* btl = mca_btl_mx_create(nic_addrs[i]);
if( NULL == btl ) {
continue;
}
status = mx_decompose_endpoint_addr( btl->mx_endpoint_addr, &(mx_addrs[i].nic_id),
&(mx_addrs[i].endpoint_id) );
if( MX_SUCCESS != status ) {
OBJ_RELEASE( btl );
continue;
}
mca_btl_mx_component.mx_btls[count++] = btl;
}
size = sizeof(mca_btl_mx_addr_t) * count;
if( 0 == count ) {
/* No active BTL module */
}
mca_btl_mx_component.mx_num_btls = count;
/* publish the MX addresses via the MCA framework */
mca_pml_base_modex_send( &mca_btl_mx_component.super.btl_version, mx_addrs, size );
free( nic_addrs );
free( mx_addrs );
btls = malloc( mca_btl_mx_component.mx_num_btls * sizeof(mca_btl_base_module_t*) );
if( NULL == btls ) {
free( mca_btl_mx_component.mx_btls );
mca_btl_mx_component.mx_num_btls = 0; /* no active BTL modules */
return NULL;
}
memcpy( btls, mca_btl_mx_component.mx_btls,
mca_btl_mx_component.mx_num_btls*sizeof(mca_btl_mx_module_t*) );
*num_btl_modules = mca_btl_mx_component.mx_num_btls;
return btls;
}
/*
* MX component progress.
*/
int mca_btl_mx_component_progress()
{
int num_progressed = 0;
size_t i;
mx_status_t mx_status;
mx_return_t mx_return;
mx_segment_t mx_segment;
mx_request_t mx_request;
mca_btl_mx_frag_t* frag;
for( i = 0; i < mca_btl_mx_component.mx_num_btls; i++ ) {
mca_btl_mx_module_t* mx_btl = mca_btl_mx_component.mx_btls[i];
uint32_t mx_result = 0;
/* pre-post receive */
#if 0
if( mx_btl->mx_recvs_posted == 0 ) {
OPAL_THREAD_ADD32( &mx_btl->mx_recvs_posted, 1 );
MCA_BTL_MX_POST( mx_btl, frag );
}
#endif
/*if( mx_btl->mx_posted_request ) { */
mx_return = mx_ipeek( mx_btl->mx_endpoint, &mx_request, &mx_result );
if( mx_return != MX_SUCCESS ) {
opal_output(0, "mca_btl_mx_component_progress: mx_ipeek() failed with status %d\n",
mx_return);
continue;
}
if( mx_result == 0 ) {
continue;
}
mx_return = mx_test( mx_btl->mx_endpoint, &mx_request, &mx_status, &mx_result);
if( mx_return != MX_SUCCESS ) {
opal_output(0, "mca_btl_mx_progress: mx_test() failed with status=%dn",
mx_return);
continue;
}
frag = mx_status.context;
if( 0 == frag->base.des_dst_cnt ) { /* it's a send */
/* call the completion callback */
frag->base.des_cbfunc( &(mx_btl->super), frag->endpoint, &(frag->base), OMPI_SUCCESS);
} else { /* and this one is a receive */
mca_btl_base_recv_reg_t* reg;
reg = &(mx_btl->mx_reg[frag->tag]);
frag->base.des_dst->seg_len = mx_status.msg_length;
reg->cbfunc( &(mx_btl->super), frag->tag, &(frag->base), reg->cbdata );
/*
* The upper level extract the data from the fragment. Now we can register the fragment
* again with the MX BTL.
*/
mx_segment.segment_ptr = frag->base.des_dst->seg_addr.pval;
mx_segment.segment_length = mca_btl_mx_module.super.btl_eager_limit;
mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1, (uint64_t)frag->tag, 0xffffffffffffffff,
frag, &(frag->mx_request) );
if( MX_SUCCESS != mx_return ) {
opal_output( 0, "Fail to re-register a fragment with the MX NIC ...\n" );
}
}
/*MCA_BTL_MX_PROGRESS(mx_btl, mx_status);*/
/*
* on the mx_status we have now the pointer attached to the request. This pointer indicate
* which fragment we are working on. On the status we have the status of the operation, so
* we know what we are supposed to do next.
*/
num_progressed++;
}
return num_progressed;
}