From d06091cf841eb663fe251883fbea065317cf28cc Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Sat, 1 Nov 2008 17:05:48 +0000 Subject: [PATCH] Completely refactor the connection procedure for MX. It is now cleaner, and in case of failure more user friendly. In addition we now fully integrate with the MX_BONDING features (i.e the multi-rails bonding can be done either at the PML OB1 level, or deep down in the MX library). This feature is driven by the btl_mx_bonding MCA parameter. Plus some extra features: - Correctly compute the bandwidth in case of bonding. - Nicely handle any mapper access errors. This commit was SVN r19874. --- ompi/mca/btl/mx/btl_mx_component.c | 364 +++++++++++++++-------------- 1 file changed, 192 insertions(+), 172 deletions(-) diff --git a/ompi/mca/btl/mx/btl_mx_component.c b/ompi/mca/btl/mx/btl_mx_component.c index 1b107571eb..42df8aa6f4 100644 --- a/ompi/mca/btl/mx/btl_mx_component.c +++ b/ompi/mca/btl/mx/btl_mx_component.c @@ -83,7 +83,7 @@ int mca_btl_mx_component_open(void) OBJ_CONSTRUCT(&mca_btl_mx_component.mx_procs, opal_list_t); mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "max_btls", "Maximum number of accepted Myrinet cards", - false, false, 10, &mca_btl_mx_component.mx_max_btls ); + false, false, 8, &mca_btl_mx_component.mx_max_btls ); mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "timeout", "Timeout for connections", false, false, MX_INFINITE, &mca_btl_mx_component.mx_timeout ); @@ -100,8 +100,22 @@ int mca_btl_mx_component_open(void) "Enable the MX support for shared memory", false, false, 0, &mca_btl_mx_component.mx_support_sharedmem ); mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "bonding", - "True if the lib MX is in charge of doing the device bonding, false if Open MPI will take care of it.", - false, false, 0, &mca_btl_mx_component.mx_bonding ); + "Integrate MX library bonding. Less than 0 is system default, everything else will set the MX_BONDING to the value.", + false, false, 1, &mca_btl_mx_component.mx_bonding ); + if( 0 >= mca_btl_mx_component.mx_bonding ) { + char* value = getenv("MX_BONDING"); + if( NULL == value ) { + mca_btl_mx_component.mx_bonding = 1; + } else { + mca_btl_mx_component.mx_bonding = atoi(value); + if( 0 >= mca_btl_mx_component.mx_bonding ) + mca_btl_mx_component.mx_bonding = 1; + } + } else if( 1 != mca_btl_mx_component.mx_bonding ) { + char value[8]; + snprintf( value, 8, "%d\n", mca_btl_mx_component.mx_bonding ); + opal_setenv( "MX_BONDING", value, true, &environ ); + } #ifdef HAVE_MX_REGISTER_UNEXP_HANDLER mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "register_unexp", "Enable the MX support for the unexpected request handler (Open MPI matching)", @@ -144,15 +158,22 @@ int mca_btl_mx_component_open(void) mca_btl_mx_module.super.btl_rdma_pipeline_send_length = 256*1024; mca_btl_mx_module.super.btl_rdma_pipeline_frag_size = 8*1024*1024; mca_btl_mx_module.super.btl_min_rdma_pipeline_size = 0; - mca_btl_mx_module.super.btl_flags = - MCA_BTL_FLAGS_SEND_INPLACE | - MCA_BTL_FLAGS_PUT | - MCA_BTL_FLAGS_SEND | - MCA_BTL_FLAGS_RDMA_MATCHED; + mca_btl_mx_module.super.btl_flags = (MCA_BTL_FLAGS_SEND_INPLACE | + MCA_BTL_FLAGS_PUT | + MCA_BTL_FLAGS_SEND | + MCA_BTL_FLAGS_RDMA_MATCHED); mca_btl_mx_module.super.btl_bandwidth = 2000; mca_btl_mx_module.super.btl_latency = 5; mca_btl_base_param_register(&mca_btl_mx_component.super.btl_version, - &mca_btl_mx_module.super); + &mca_btl_mx_module.super); + + if( 0 == mca_btl_mx_component.mx_support_sharedmem ) + opal_setenv( "MX_DISABLE_SHMEM", "1", true, &environ ); + if( 0 == mca_btl_mx_component.mx_support_self ) + opal_setenv( "MX_DISABLE_SELF", "1", true, &environ ); + /* Force the long pipeline (up to 4Kb fragments) */ + opal_setenv( "MX_PIPELINE_LOG", "0", true, &environ ); + return OMPI_SUCCESS; } @@ -236,168 +257,137 @@ mca_btl_mx_unexpected_handler( void *context, mx_endpoint_addr_t source, /* * Create and initialize an MX BTL module, where each module - * represents a specific NIC. + * represents a specific NIC or a specific bonded set of NICS. */ -static mca_btl_mx_module_t* mca_btl_mx_create(uint64_t addr) +static mca_btl_mx_module_t* mca_btl_mx_create(uint32_t board_num) { mca_btl_mx_module_t* mx_btl; + mx_endpoint_t mx_endpoint; + mx_endpoint_addr_t mx_endpoint_addr; mx_return_t status; - uint32_t nic_id, mx_unique_network_id = 0; - char mapper_mac[7], *where; + uint32_t endpoint_id, mx_unique_network_id = 0; + uint64_t nic_id; - status = mx_nic_id_to_board_number( addr, &nic_id ); + /* open local endpoint */ + status = mx_open_endpoint( board_num, MX_ANY_ENDPOINT, + mca_btl_mx_component.mx_filter, + NULL, 0, &mx_endpoint); + if(status != MX_SUCCESS) { + opal_output( 0, "mca_btl_mx_init: mx_open_endpoint() failed with status %d (%s)\n", + status, mx_strerror(status) ); + return NULL; + } + + /* query the endpoint address */ + if((status = mx_get_endpoint_addr( mx_endpoint, + &mx_endpoint_addr)) != MX_SUCCESS) { + opal_output( 0, "mca_btl_mx_init: mx_get_endpoint_addr() failed with status %d (%s)\n", + status, mx_strerror(status) ); + mx_close_endpoint(mx_endpoint); + return NULL; + } + + status = mx_decompose_endpoint_addr(mx_endpoint_addr, &nic_id, &endpoint_id); if( MX_SUCCESS != status ) { + opal_output( 0, "mca_btl_mx_init: mx_decompose_endpoint_addr() failed with status %d (%s)\n", + status, mx_strerror(status) ); + mx_close_endpoint(mx_endpoint); + return NULL; + } + status = mx_nic_id_to_board_number(nic_id, &board_num); + if( MX_SUCCESS != status ) { + opal_output( 0, "mca_btl_mx_init: mx_nic_id_to_board_number() failed with status %d (%s)\n", + status, mx_strerror(status) ); + mx_close_endpoint(mx_endpoint); return NULL; } #if MX_HAVE_MAPPER_STATE { - mx_return_t ret; mx_endpt_handle_t endp_handle; mx_mapper_state_t ms; + char mapper_mac[7], *where; - ret = mx_open_board( nic_id, &endp_handle ); - if( MX_SUCCESS != ret ) { - opal_output( 0, "Unable to open board %d: %s\n", nic_id, mx_strerror(ret) ); + status = mx_open_board( board_num, &endp_handle ); + if( MX_SUCCESS != status ) { + opal_output( 0, "Unable to open board %d: %s\n", board_num, mx_strerror(status) ); + mx_close_endpoint(mx_endpoint); return NULL; } - ms.board_number = nic_id; + ms.board_number = board_num; ms.iport = 0; - ret = mx__get_mapper_state( endp_handle, &ms ); - if( MX_SUCCESS != ret ) { + status = mx__get_mapper_state( endp_handle, &ms ); + if( MX_SUCCESS != status ) { opal_output( 0, "get_mapper_state failed for board %d: %s\n", - nic_id, mx_strerror(ret) ); + board_num, mx_strerror(status) ); + mx_close_endpoint(mx_endpoint); return NULL; } - /* Keep the first 4 bytes for the network speed */ - mx_unique_network_id = ((ms.mapper_mac[3] << 16) + + /* Keep the first 4 bytes for the network speed */ + mx_unique_network_id = ((ms.mapper_mac[3] << 16) + (ms.mapper_mac[4] << 8) + (ms.mapper_mac[5])); - } + /* Try to figure out if we are allowed to use this network */ + snprintf( mapper_mac, 7, "%6x", mx_unique_network_id ); - /* Try to figure out if we are allowed to use this network */ - snprintf( mapper_mac, 7, "%6x", mx_unique_network_id ); - - if( (NULL != mca_btl_mx_component.mx_if_exclude) && - (NULL != (where = strstr(mca_btl_mx_component.mx_if_exclude, mapper_mac))) ) { - /*opal_output( 0, "MX network %d connected to the mapper %s has been excluded\n", - nic_id, mapper_mac );*/ - return NULL; - } - else if( (NULL != mca_btl_mx_component.mx_if_include) && - (NULL == (where = strstr(mca_btl_mx_component.mx_if_include, mapper_mac))) ) { - /*opal_output( 0, "MX network %d connected to the mapper %s has not been included\n", - nic_id, mapper_mac );*/ - return NULL; + if( (NULL != mca_btl_mx_component.mx_if_exclude) && + (NULL != (where = strstr(mca_btl_mx_component.mx_if_exclude, mapper_mac))) ) { + mx_close_endpoint(mx_endpoint); + return NULL; + } + else if( (NULL != mca_btl_mx_component.mx_if_include) && + (NULL == (where = strstr(mca_btl_mx_component.mx_if_include, mapper_mac))) ) { + mx_close_endpoint(mx_endpoint); + return NULL; + } } #endif /* MX_HAVE_MAPPER_STATE */ mx_btl = malloc(sizeof(mca_btl_mx_module_t)); - if( NULL == mx_btl ) return NULL; + if( NULL == mx_btl ) { + opal_output( 0, "mca_btl_mx_init: unable to allocate %d bytes of memory\n", + sizeof(mca_btl_mx_module_t) ); + mx_close_endpoint(mx_endpoint); + return NULL; + } /* copy over default settings */ memcpy( mx_btl, &mca_btl_mx_module, sizeof(mca_btl_mx_module_t) ); OBJ_CONSTRUCT( &mx_btl->mx_peers, opal_list_t ); OBJ_CONSTRUCT( &mx_btl->mx_lock, opal_mutex_t ); - /* open local endpoint */ - status = mx_open_endpoint( nic_id, MX_ANY_ENDPOINT, - mca_btl_mx_component.mx_filter, - NULL, 0, &mx_btl->mx_endpoint); - if(status != MX_SUCCESS) { - opal_output( 0, "mca_btl_mx_init: mx_open_endpoint() failed with status %d (%s)\n", - status, mx_strerror(status) ); - mx_btl->mx_endpoint = NULL; - mca_btl_mx_finalize( &mx_btl->super ); - return NULL; - } - mx_btl->mx_unique_network_id = mx_unique_network_id; - mx_btl->super.btl_bandwidth = 1000; /* whatever */ + mx_btl->mx_endpoint = mx_endpoint; + mx_btl->mx_endpoint_addr = mx_endpoint_addr; + + mx_btl->super.btl_bandwidth = 2000; /* whatever */ mx_btl->super.btl_latency = 10; #if defined(MX_HAS_NET_TYPE) { - int value; + int value, board = board_num; if( (status = mx_get_info( mx_btl->mx_endpoint, MX_LINE_SPEED, - &nic_id, sizeof(nic_id), + &board, sizeof(board), &value, sizeof(int))) != MX_SUCCESS ) { opal_output( 0, "mx_get_info(MX_LINE_SPEED) failed with status %d (%s)\n", status, mx_strerror(status) ); } else { if( MX_SPEED_2G == value ) { - mx_btl->mx_unique_network_id |= 0xaa000000; + mx_unique_network_id |= 0xaa000000; mx_btl->super.btl_bandwidth = 2000; mx_btl->super.btl_latency = 5; } else if( MX_SPEED_10G == value ) { - mx_btl->mx_unique_network_id |= 0xbb000000; + mx_unique_network_id |= 0xbb000000; mx_btl->super.btl_bandwidth = 10000; mx_btl->super.btl_latency = 3; } else { - mx_btl->mx_unique_network_id |= 0xcc000000; + mx_unique_network_id |= 0xcc000000; } } } #endif /* defined(MX_HAS_NET_TYPE) */ + mx_btl->super.btl_bandwidth *= mca_btl_mx_component.mx_bonding; + mx_btl->mx_unique_network_id = mx_unique_network_id; -#if 0 - { - int counters, board, i, value, *counters_value; - char text[MX_MAX_STR_LEN]; - char *counters_name; - if( (status = mx_get_info( mx_btl->mx_endpoint, MX_PIO_SEND_MAX, NULL, 0, - &value, sizeof(int))) != MX_SUCCESS ) { - opal_output( 0, "mx_get_info(MX_PIO_SEND_MAX) failed with status %d (%s)\n", - status, mx_strerror(status) ); - } - printf( "MX_PIO_SEND_MAX = %d\n", value ); - if( (status = mx_get_info( mx_btl->mx_endpoint, MX_COPY_SEND_MAX, NULL, 0, - &value, sizeof(int))) != MX_SUCCESS ) { - opal_output( 0, "mx_get_info(MX_COPY_SEND_MAX) failed with status %d (%s)\n", - status, mx_strerror(status) ); - } - printf( "MX_COPY_SEND_MAX = %d\n", value ); - - board = 0; - if( (status = mx_get_info( mx_btl->mx_endpoint, MX_PRODUCT_CODE, &board, sizeof(int), - text, MX_MAX_STR_LEN)) != MX_SUCCESS ) { - opal_output( 0, "mx_get_info(MX_PRODUCT_CODE) failed with status %d (%s)\n", - status, mx_strerror(status) ); - } - printf( "product code %s\n", text ); - - if( (status = mx_get_info( mx_btl->mx_endpoint, MX_COUNTERS_COUNT, &board, sizeof(int), - &counters, sizeof(int))) != MX_SUCCESS ) { - opal_output( 0, "mx_get_info(MX_COUNTERS_COUNT) failed with status %d (%s)\n", - status, mx_strerror(status) ); - } - printf( "counters = %d\n", counters ); - counters_name = (char*)malloc( counters * MX_MAX_STR_LEN ); - if( (status = mx_get_info( mx_btl->mx_endpoint, MX_COUNTERS_LABELS, &board, sizeof(int), - counters_name, counters * MX_MAX_STR_LEN)) != MX_SUCCESS ) { - opal_output( 0, "mx_get_info(MX_COUNTERS_LABELS) failed with status %d (%s)\n", - status, mx_strerror(status) ); - } - counters_value = (int*)malloc( counters * sizeof(int) ); - if( (status = mx_get_info( mx_btl->mx_endpoint, MX_COUNTERS_VALUES, &board, sizeof(int), - counters_value, counters * sizeof(int))) != MX_SUCCESS ) { - opal_output( 0, "mx_get_info(MX_COUNTERS_VALUES) failed with status %d (%s)\n", - status, mx_strerror(status) ); - } - for( i = 0; i < counters; i++ ) - printf( "%d -> %s = %d\n", i, counters_name + i * MX_MAX_STR_LEN, - counters_value[i] ); - free( counters_name ); - free( counters_value ); - } -#endif - /* query the endpoint address */ - if((status = mx_get_endpoint_addr( mx_btl->mx_endpoint, - &mx_btl->mx_endpoint_addr)) != MX_SUCCESS) { - opal_output( 0, "mca_btl_mx_init: mx_get_endpoint_addr() failed with status %d (%s)\n", - status, mx_strerror(status) ); - mca_btl_mx_finalize( &mx_btl->super ); - return NULL; - } #ifdef HAVE_MX_REGISTER_UNEXP_HANDLER if( mca_btl_mx_component.mx_use_unexpected ) { status = mx_register_unexp_handler( mx_btl->mx_endpoint, mca_btl_mx_unexpected_handler, @@ -405,8 +395,8 @@ static mca_btl_mx_module_t* mca_btl_mx_create(uint64_t addr) if( MX_SUCCESS != status ) { opal_output( 0, "mca_btl_mx_init: mx_register_unexp_handler() failed with status %d (%s)\n", status, mx_strerror(status) ); - mca_btl_mx_finalize( &mx_btl->super ); - return NULL; + /* switch to a mode without the unexpected handler */ + mca_btl_mx_component.mx_use_unexpected = 0; } } #endif /* HAVE_MX_REGISTER_UNEXP_HANDLER */ @@ -425,9 +415,8 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules, { mca_btl_base_module_t** btls; mx_return_t status; - uint32_t size, count; + uint32_t count; int32_t i; - uint64_t *nic_addrs = NULL; mca_btl_mx_addr_t *mx_addrs; *num_btl_modules = 0; @@ -440,13 +429,6 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules, */ /*(void)ompi_common_mx_finalize();*/ - if( 0 == mca_btl_mx_component.mx_support_sharedmem ) - opal_setenv( "MX_DISABLE_SHMEM", "1", true, &environ ); - if( 0 == mca_btl_mx_component.mx_support_self ) - opal_setenv( "MX_DISABLE_SELF", "1", true, &environ ); - /* Force the long pipeline (up to 4Kb fragments) */ - opal_setenv( "MX_PIPELINE_LOG", "0", true, &environ ); - /* set the MX error handle to always return. This function is the only MX function * allowed to be called before mx_init in order to make sure that if the MX is not * up and running the MX library does not exit the application. @@ -459,6 +441,27 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules, return NULL; } + /* get the number of card available on the system */ + if( MX_SUCCESS != (status = mx_get_info( NULL, MX_NIC_COUNT, NULL, 0, + &mca_btl_mx_component.mx_num_btls, + sizeof(uint32_t))) ) { + opal_output( 0, "mca_btl_mx_component_init: mx_get_info(MX_NIC_COUNT) failed with status %d(%s)\n", + status, mx_strerror(status) ); + return NULL; + } + /* Don't forget the bonding rules ... */ + assert( mca_btl_mx_component.mx_bonding >= 1 ); + mca_btl_mx_component.mx_num_btls /= mca_btl_mx_component.mx_bonding; + + if (0 == mca_btl_mx_component.mx_num_btls) { + mca_btl_base_error_no_nics("Myrinet/MX", "NIC"); + return NULL; + } + /* Limit ourselves to the number of devices requested by the users. */ + if( mca_btl_mx_component.mx_num_btls > mca_btl_mx_component.mx_max_btls ) { + mca_btl_mx_component.mx_num_btls = mca_btl_mx_component.mx_max_btls; + } + /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_mx_component.mx_send_eager_frags, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_mx_component.mx_send_user_frags, ompi_free_list_t); @@ -488,65 +491,33 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules, /* intialize process hash table */ OBJ_CONSTRUCT( &mca_btl_mx_component.mx_procs, opal_list_t ); - if( mca_btl_mx_component.mx_bonding ) { - mca_btl_mx_component.mx_num_btls = 1; /* there is only one! */ - } else { - /* get the number of card available on the system */ - if( (status = mx_get_info( NULL, MX_NIC_COUNT, NULL, 0, - &mca_btl_mx_component.mx_num_btls, sizeof(uint32_t))) != MX_SUCCESS ) { - opal_output( 0, "mca_btl_mx_component_init: mx_get_info(MX_NIC_COUNT) failed with status %d(%s)\n", - status, mx_strerror(status) ); - return NULL; - } - - if (0 == mca_btl_mx_component.mx_num_btls) { - mca_btl_base_error_no_nics("Myrinet/MX", "NIC"); - return NULL; - } - - /* determine the NIC ids */ - size = sizeof(uint64_t) * (mca_btl_mx_component.mx_num_btls + 1); - if( NULL == (nic_addrs = (uint64_t*)malloc(size)) ) - return NULL; - if( (status = mx_get_info( NULL, MX_NIC_IDS, NULL, 0, - nic_addrs, size)) != MX_SUCCESS) { - opal_output(0, "mca_btl_mx_component_init: mx_get_info(MX_NICS_IDS) failed size = %ld [%s] #cards %d\n", - (unsigned long)size, mx_strerror(status), mca_btl_mx_component.mx_num_btls ); - free(nic_addrs); - return NULL; - } - /* - * Limit ourselves to the number of devices requested by the users. - */ - if( mca_btl_mx_component.mx_num_btls > mca_btl_mx_component.mx_max_btls ) { - mca_btl_mx_component.mx_num_btls = mca_btl_mx_component.mx_max_btls; - } - } - /* Now we know how many NIC are available on the system. We will create a BTL * for each one and then give a pointer to the BTL to the upper level. */ - mca_btl_mx_component.mx_btls = malloc( mca_btl_mx_component.mx_num_btls * sizeof(mca_btl_base_module_t*) ); + mca_btl_mx_component.mx_btls = malloc( mca_btl_mx_component.mx_num_btls * + sizeof(mca_btl_base_module_t*) ); if( NULL == mca_btl_mx_component.mx_btls ) { - if( NULL != nic_addrs) free( nic_addrs ); opal_output( 0, "MX BTL unable to allocate memory\n" ); return NULL; } - mx_addrs = (mca_btl_mx_addr_t*)calloc( mca_btl_mx_component.mx_num_btls, sizeof(mca_btl_mx_addr_t) ); + mx_addrs = (mca_btl_mx_addr_t*)calloc( mca_btl_mx_component.mx_num_btls, + sizeof(mca_btl_mx_addr_t) ); if( NULL == mx_addrs ) { - if( NULL != nic_addrs) free( nic_addrs ); opal_output( 0, "MX BTL unable to allocate memory\n" ); + free(mca_btl_mx_component.mx_btls); + mca_btl_mx_component.mx_btls = NULL; return NULL; } /* create a btl for each NIC */ for( i = count = 0; i < mca_btl_mx_component.mx_num_btls; i++ ) { - mca_btl_mx_module_t* mx_btl = mca_btl_mx_create(nic_addrs[i]); + mca_btl_mx_module_t* mx_btl = mca_btl_mx_create(MX_ANY_NIC); if( NULL == mx_btl ) { continue; } - status = mx_decompose_endpoint_addr( mx_btl->mx_endpoint_addr, &(mx_addrs[count].nic_id), + status = mx_decompose_endpoint_addr( mx_btl->mx_endpoint_addr, + &(mx_addrs[count].nic_id), &(mx_addrs[count].endpoint_id) ); if( MX_SUCCESS != status ) { mca_btl_mx_finalize( &mx_btl->super ); @@ -564,7 +535,6 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules, *num_btl_modules = count; if( 0 == count ) { /* No active BTL module */ - free(nic_addrs); free(mx_addrs); free(mca_btl_mx_component.mx_btls); mca_btl_mx_component.mx_btls = NULL; @@ -575,11 +545,9 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules, ompi_modex_send(&mca_btl_mx_component.super.btl_version, mx_addrs, sizeof(mca_btl_mx_addr_t) * mca_btl_mx_component.mx_num_btls); - free( nic_addrs ); free( mx_addrs ); btls = malloc( mca_btl_mx_component.mx_num_btls * sizeof(mca_btl_base_module_t*) ); - if( NULL == btls ) { free( mca_btl_mx_component.mx_btls ); mca_btl_mx_component.mx_num_btls = 0; /* no active BTL modules */ @@ -587,6 +555,7 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules, } memcpy( btls, mca_btl_mx_component.mx_btls, mca_btl_mx_component.mx_num_btls*sizeof(mca_btl_mx_module_t*) ); + return btls; } @@ -679,3 +648,54 @@ int mca_btl_mx_component_progress(void) return num_progressed; } +#if 0 + { + int counters, board, i, value, *counters_value; + char text[MX_MAX_STR_LEN]; + char *counters_name; + if( (status = mx_get_info( mx_btl->mx_endpoint, MX_PIO_SEND_MAX, NULL, 0, + &value, sizeof(int))) != MX_SUCCESS ) { + opal_output( 0, "mx_get_info(MX_PIO_SEND_MAX) failed with status %d (%s)\n", + status, mx_strerror(status) ); + } + printf( "MX_PIO_SEND_MAX = %d\n", value ); + if( (status = mx_get_info( mx_btl->mx_endpoint, MX_COPY_SEND_MAX, NULL, 0, + &value, sizeof(int))) != MX_SUCCESS ) { + opal_output( 0, "mx_get_info(MX_COPY_SEND_MAX) failed with status %d (%s)\n", + status, mx_strerror(status) ); + } + printf( "MX_COPY_SEND_MAX = %d\n", value ); + + board = 0; + if( (status = mx_get_info( mx_btl->mx_endpoint, MX_PRODUCT_CODE, &board, sizeof(int), + text, MX_MAX_STR_LEN)) != MX_SUCCESS ) { + opal_output( 0, "mx_get_info(MX_PRODUCT_CODE) failed with status %d (%s)\n", + status, mx_strerror(status) ); + } + printf( "product code %s\n", text ); + + if( (status = mx_get_info( mx_btl->mx_endpoint, MX_COUNTERS_COUNT, &board, sizeof(int), + &counters, sizeof(int))) != MX_SUCCESS ) { + opal_output( 0, "mx_get_info(MX_COUNTERS_COUNT) failed with status %d (%s)\n", + status, mx_strerror(status) ); + } + printf( "counters = %d\n", counters ); + counters_name = (char*)malloc( counters * MX_MAX_STR_LEN ); + if( (status = mx_get_info( mx_btl->mx_endpoint, MX_COUNTERS_LABELS, &board, sizeof(int), + counters_name, counters * MX_MAX_STR_LEN)) != MX_SUCCESS ) { + opal_output( 0, "mx_get_info(MX_COUNTERS_LABELS) failed with status %d (%s)\n", + status, mx_strerror(status) ); + } + counters_value = (int*)malloc( counters * sizeof(int) ); + if( (status = mx_get_info( mx_btl->mx_endpoint, MX_COUNTERS_VALUES, &board, sizeof(int), + counters_value, counters * sizeof(int))) != MX_SUCCESS ) { + opal_output( 0, "mx_get_info(MX_COUNTERS_VALUES) failed with status %d (%s)\n", + status, mx_strerror(status) ); + } + for( i = 0; i < counters; i++ ) + printf( "%d -> %s = %d\n", i, counters_name + i * MX_MAX_STR_LEN, + counters_value[i] ); + free( counters_name ); + free( counters_value ); + } +#endif