1
1

Bring over the functionality from the /tmp/jnysal-openib-wireup

branch:

 * Support btl_openib_if_include and btl_openib_if_exclude MCA
   parameters, similar to those supported by other BTLs.  Each take a
   comma-delimited lists of identifiers.  Identifiers can be HCA
   interface names (e.g., ipath0, mthca1, etc.)  or an HCA interface
   name and port numbers (e.g., ipath0:1, mthca1:2, etc.).  It is an
   error to specify both _include and _exclude.  If you specify a
   non-existant (or non-ACTIVE) HCA and/or port, you'll get a warning
   unless you disable the warning by setting the MCA parameter
   btl_openib_warn_nonexistent_if to 0.
 * Start updating to use BEGIN_C_DECLS and END_C_DECLS
 * A few other minor fixes that were picked up along the way.

This commit was SVN r15063.
Этот коммит содержится в:
Jeff Squyres 2007-06-14 01:59:25 +00:00
родитель de0f1eef89
Коммит 1e18265c16
4 изменённых файлов: 274 добавлений и 21 удалений

Просмотреть файл

@ -44,9 +44,7 @@
#include "btl_openib_frag.h" #include "btl_openib_frag.h"
#if defined(c_plusplus) || defined(__cplusplus) BEGIN_C_DECLS
extern "C" {
#endif
#define MCA_BTL_IB_LEAVE_PINNED 1 #define MCA_BTL_IB_LEAVE_PINNED 1
#define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll #define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
@ -129,6 +127,10 @@ struct mca_btl_openib_component_t {
#if OMPI_HAVE_POSIX_THREADS #if OMPI_HAVE_POSIX_THREADS
int32_t fatal_counter; /**< Counts number on fatal events that we got on all hcas */ int32_t fatal_counter; /**< Counts number on fatal events that we got on all hcas */
#endif #endif
char *if_include;
char **if_include_list;
char *if_exclude;
char **if_exclude_list;
/** Colon-delimited list of filenames for HCA parameters */ /** Colon-delimited list of filenames for HCA parameters */
char *hca_params_file_names; char *hca_params_file_names;
@ -142,6 +144,13 @@ struct mca_btl_openib_component_t {
/** Whether we want a warning if non default GID prefix is not configured /** Whether we want a warning if non default GID prefix is not configured
on multiport setup */ on multiport setup */
bool warn_default_gid_prefix; bool warn_default_gid_prefix;
/** Whether we want a warning if the user specifies a non-existent
HCA and/or port via btl_openib_if_[in|ex]clude MCA params */
bool warn_nonexistent_if;
/** Dummy argv-style list; a copy of names from the
if_[in|ex]clude list that we use for error checking (to ensure
that they all exist) */
char **if_list;
#ifdef HAVE_IBV_FORK_INIT #ifdef HAVE_IBV_FORK_INIT
/** Whether we want fork support or not */ /** Whether we want fork support or not */
int want_fork_support; int want_fork_support;
@ -505,7 +514,6 @@ static inline int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl,
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
#if defined(c_plusplus) || defined(__cplusplus) END_C_DECLS
}
#endif
#endif /* MCA_BTL_IB_H */ #endif /* MCA_BTL_IB_H */

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -32,6 +32,7 @@
#include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/btl.h"
#include "opal/sys/timer.h" #include "opal/sys/timer.h"
#include "opal/sys/atomic.h" #include "opal/sys/atomic.h"
#include "opal/util/argv.h"
#include "opal/mca/base/mca_base_param.h" #include "opal/mca/base/mca_base_param.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
@ -87,6 +88,7 @@ static void btl_openib_frag_progress_pending(
static int openib_reg_mr(void *reg_data, void *base, size_t size, static int openib_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg); mca_mpool_base_registration_t *reg);
static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg); static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
static int get_port_list(mca_btl_openib_hca_t *hca, int *allowed_ports);
#if OMPI_HAVE_POSIX_THREADS #if OMPI_HAVE_POSIX_THREADS
void* btl_openib_async_thread(void *one_hca); void* btl_openib_async_thread(void *one_hca);
#endif #endif
@ -461,10 +463,11 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
{ {
struct mca_mpool_base_resources_t mpool_resources; struct mca_mpool_base_resources_t mpool_resources;
mca_btl_openib_hca_t *hca; mca_btl_openib_hca_t *hca;
uint8_t i; uint8_t i, k = 0;
int ret = -1; int ret = -1, port_cnt;
ompi_btl_openib_ini_values_t values, default_values; ompi_btl_openib_ini_values_t values, default_values;
int *allowed_ports;
hca = malloc(sizeof(mca_btl_openib_hca_t)); hca = malloc(sizeof(mca_btl_openib_hca_t));
if(NULL == hca){ if(NULL == hca){
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__)); BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
@ -486,7 +489,13 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
ibv_get_device_name(ib_dev), strerror(errno))); ibv_get_device_name(ib_dev), strerror(errno)));
goto close_hca; goto close_hca;
} }
/* If mca_btl_if_include/exclude were specified, get usable ports */
allowed_ports = (int*) malloc(hca->ib_dev_attr.phys_port_cnt * sizeof(int));
port_cnt = get_port_list(hca, allowed_ports);
if(0 == port_cnt) {
ret = OMPI_SUCCESS;
goto close_hca;
}
/* Load in vendor/part-specific HCA parameters. Note that even if /* Load in vendor/part-specific HCA parameters. Note that even if
we don't find values for this vendor/part, "values" will be set we don't find values for this vendor/part, "values" will be set
indicating that it does not have good values */ indicating that it does not have good values */
@ -583,17 +592,16 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
ret = OMPI_SUCCESS; ret = OMPI_SUCCESS;
/* Note ports are 1 based hence j = 1 */ /* Note ports are 1 based (i >= 1) */
for(i = 1; i <= hca->ib_dev_attr.phys_port_cnt; i++){ for(k = 0; k < port_cnt; k++){
struct ibv_port_attr ib_port_attr; struct ibv_port_attr ib_port_attr;
i = allowed_ports[k];
if(ibv_query_port(hca->ib_dev_context, i, &ib_port_attr)){ if(ibv_query_port(hca->ib_dev_context, i, &ib_port_attr)){
BTL_ERROR(("error getting port attributes for device %s " BTL_ERROR(("error getting port attributes for device %s "
"port number %d errno says %s", "port number %d errno says %s",
ibv_get_device_name(ib_dev), i, strerror(errno))); ibv_get_device_name(ib_dev), i, strerror(errno)));
break; break;
} }
if(IBV_PORT_ACTIVE == ib_port_attr.state){ if(IBV_PORT_ACTIVE == ib_port_attr.state){
if (0 == mca_btl_openib_component.ib_pkey_val) { if (0 == mca_btl_openib_component.ib_pkey_val) {
@ -663,6 +671,9 @@ dealloc_pd:
ibv_dealloc_pd(hca->ib_pd); ibv_dealloc_pd(hca->ib_pd);
close_hca: close_hca:
ibv_close_device(hca->ib_dev_context); ibv_close_device(hca->ib_dev_context);
if(NULL != allowed_ports) {
free(allowed_ports);
}
free_hca: free_hca:
free(hca); free(hca);
return ret; return ret;
@ -705,7 +716,7 @@ btl_openib_component_init(int *num_btl_modules,
/* Read in INI files with HCA-specific parameters */ /* Read in INI files with HCA-specific parameters */
if (OMPI_SUCCESS != (ret = ompi_btl_openib_ini_init())) { if (OMPI_SUCCESS != (ret = ompi_btl_openib_ini_init())) {
return NULL; goto no_btls;
} }
#if OMPI_HAVE_POSIX_THREADS #if OMPI_HAVE_POSIX_THREADS
/* Set the fatal counter to zero */ /* Set the fatal counter to zero */
@ -724,14 +735,36 @@ btl_openib_component_init(int *num_btl_modules,
opal_show_help("help-mpi-btl-openib.txt", opal_show_help("help-mpi-btl-openib.txt",
"ibv_fork_init fail", true, "ibv_fork_init fail", true,
orte_system_info.nodename); orte_system_info.nodename);
mca_btl_openib_component.ib_num_btls = 0; goto no_btls;
btl_openib_modex_send();
return NULL;
} }
} }
} }
#endif #endif
/* Parse the include and exclude lists, checking for errors */
mca_btl_openib_component.if_include_list =
mca_btl_openib_component.if_exclude_list =
mca_btl_openib_component.if_list = NULL;
if (NULL != mca_btl_openib_component.if_include &&
NULL != mca_btl_openib_component.if_exclude) {
opal_show_help("help-mpi-btl-openib.txt",
"specified include and exclude", true,
mca_btl_openib_component.if_include,
mca_btl_openib_component.if_exclude, NULL);
goto no_btls;
} else if (NULL != mca_btl_openib_component.if_include) {
mca_btl_openib_component.if_include_list =
opal_argv_split(mca_btl_openib_component.if_include, ',');
mca_btl_openib_component.if_list =
opal_argv_copy(mca_btl_openib_component.if_include_list);
} else if (NULL != mca_btl_openib_component.if_exclude) {
mca_btl_openib_component.if_exclude_list =
opal_argv_split(mca_btl_openib_component.if_exclude, ',');
mca_btl_openib_component.if_list =
opal_argv_copy(mca_btl_openib_component.if_exclude_list);
}
#ifdef HAVE_IBV_GET_DEVICE_LIST #ifdef HAVE_IBV_GET_DEVICE_LIST
ib_devs = ibv_get_device_list(&num_devs); ib_devs = ibv_get_device_list(&num_devs);
#else #else
@ -776,7 +809,6 @@ btl_openib_component_init(int *num_btl_modules,
OBJ_CONSTRUCT(&btl_list, opal_list_t); OBJ_CONSTRUCT(&btl_list, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_openib_component.ib_lock, opal_mutex_t);
for (i = 0; i < num_devs && for (i = 0; i < num_devs &&
(-1 == mca_btl_openib_component.ib_max_btls || (-1 == mca_btl_openib_component.ib_max_btls ||
mca_btl_openib_component.ib_num_btls < mca_btl_openib_component.ib_num_btls <
@ -790,6 +822,21 @@ btl_openib_component_init(int *num_btl_modules,
opal_show_help("help-mpi-btl-openib.txt", opal_show_help("help-mpi-btl-openib.txt",
"error in hca init", true, orte_system_info.nodename); "error in hca init", true, orte_system_info.nodename);
} }
/* If we got back from checking all the HCAs and find that there
are still items in the component.if_list, that means that they
didn't exist. Show an appropriate warning if the warning was
not disabled. */
if (0 != opal_argv_count(mca_btl_openib_component.if_list) &&
mca_btl_openib_component.warn_nonexistent_if) {
char *str = opal_argv_join(mca_btl_openib_component.if_list, ',');
opal_show_help("help-mpi-btl-openib.txt", "nonexistent port",
true, orte_system_info.nodename,
((NULL != mca_btl_openib_component.if_include) ?
"in" : "ex"), str);
free(str);
}
if(0 == mca_btl_openib_component.ib_num_btls) { if(0 == mca_btl_openib_component.ib_num_btls) {
opal_show_help("help-mpi-btl-openib.txt", opal_show_help("help-mpi-btl-openib.txt",
@ -962,7 +1009,23 @@ btl_openib_component_init(int *num_btl_modules,
#else #else
free(ib_devs); free(ib_devs);
#endif #endif
if (NULL != mca_btl_openib_component.if_include_list) {
opal_argv_free(mca_btl_openib_component.if_include_list);
mca_btl_openib_component.if_include_list = NULL;
}
if (NULL != mca_btl_openib_component.if_exclude_list) {
opal_argv_free(mca_btl_openib_component.if_exclude_list);
mca_btl_openib_component.if_exclude_list = NULL;
}
return btls; return btls;
no_btls:
/* If we fail early enough in the setup, we just modex around that
there are no openib BTL's in this process and return NULL. */
mca_btl_openib_component.ib_num_btls = 0;
btl_openib_modex_send();
return NULL;
} }
@ -1514,3 +1577,114 @@ error:
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL); openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL);
return count; return count;
} }
static int
get_port_list(mca_btl_openib_hca_t *hca, int *allowed_ports)
{
int i, j, k, num_ports = 0;
const char *dev_name;
char *name;
dev_name = ibv_get_device_name(hca->ib_dev);
name = (char*) malloc(strlen(dev_name) + 4);
if (NULL == name) {
return 0;
}
/* Assume that all ports are allowed. num_ports will be adjusted
below to reflect whether this is true or not. */
for (i = 1; i <= hca->ib_dev_attr.phys_port_cnt; ++i) {
allowed_ports[num_ports++] = i;
}
num_ports = 0;
if (NULL != mca_btl_openib_component.if_include_list) {
/* If only the HCA name is given (eg. mthca0,mthca1) use all
ports */
i = 0;
while (mca_btl_openib_component.if_include_list[i]) {
if (0 == strcmp(dev_name,
mca_btl_openib_component.if_include_list[i])) {
num_ports = hca->ib_dev_attr.phys_port_cnt;
goto done;
}
++i;
}
/* Include only requested ports on the HCA */
for (i = 1; i <= hca->ib_dev_attr.phys_port_cnt; ++i) {
sprintf(name,"%s:%d",dev_name,i);
for (j = 0;
NULL != mca_btl_openib_component.if_include_list[j]; ++j) {
if (0 == strcmp(name,
mca_btl_openib_component.if_include_list[j])) {
allowed_ports[num_ports++] = i;
break;
}
}
}
} else if (NULL != mca_btl_openib_component.if_exclude_list) {
/* If only the HCA name is given (eg. mthca0,mthca1) exclude
all ports */
i = 0;
while (mca_btl_openib_component.if_exclude_list[i]) {
if (0 == strcmp(dev_name,
mca_btl_openib_component.if_exclude_list[i])) {
num_ports = 0;
goto done;
}
++i;
}
/* Exclude the specified ports on this HCA */
for (i = 1; i <= hca->ib_dev_attr.phys_port_cnt; ++i) {
sprintf(name,"%s:%d",dev_name,i);
for (j = 0;
NULL != mca_btl_openib_component.if_exclude_list[j]; ++j) {
if (0 == strcmp(name,
mca_btl_openib_component.if_exclude_list[j])) {
/* If found, set a sentinel value */
j = -1;
break;
}
}
/* If we didn't find it, it's ok to include in the list */
if (-1 != j) {
allowed_ports[num_ports++] = i;
}
}
} else {
num_ports = hca->ib_dev_attr.phys_port_cnt;
}
done:
/* Remove the following from the error-checking if_list:
- bare device name
- device name suffixed with port number */
if (NULL != mca_btl_openib_component.if_list) {
for (i = 0; NULL != mca_btl_openib_component.if_list[i]; ++i) {
/* Look for raw device name */
if (0 == strcmp(mca_btl_openib_component.if_list[i], dev_name)) {
j = opal_argv_count(mca_btl_openib_component.if_list);
opal_argv_delete(&j, &(mca_btl_openib_component.if_list),
i, 1);
--i;
}
}
for (i = 1; i <= hca->ib_dev_attr.phys_port_cnt; ++i) {
sprintf(name, "%s:%d", dev_name, i);
for (j = 0; NULL != mca_btl_openib_component.if_list[j]; ++j) {
if (0 == strcmp(mca_btl_openib_component.if_list[j], name)) {
k = opal_argv_count(mca_btl_openib_component.if_list);
opal_argv_delete(&k, &(mca_btl_openib_component.if_list),
j, 1);
--j;
break;
}
}
}
}
free(name);
return num_ports;
}

Просмотреть файл

@ -120,6 +120,31 @@ int btl_openib_register_mca_params(void)
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)", "Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
1, &ival, 0)); 1, &ival, 0));
mca_btl_openib_component.warn_default_gid_prefix = (0 != ival); mca_btl_openib_component.warn_default_gid_prefix = (0 != ival);
CHECK(reg_int("warn_nonexistent_if",
"Warn if non-existent HCAs and/or ports are specified in the btl_openib_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_btl_openib_component.warn_nonexistent_if = (0 != ival);
#ifdef HAVE_IBV_FORK_INIT
ival2 = -1;
#else
ival2 = 0;
#endif
CHECK(reg_int("want_fork_support",
"Whether fork support is desired or not "
"(negative = try to enable fork support, but continue even if it is not available, 0 = do not enable fork support, positive = try to enable fork support and fail if it is not available)",
ival2, &ival, 0));
#ifdef HAVE_IBV_FORK_INIT
mca_btl_openib_component.want_fork_support = ival;
#else
if (0 != ival) {
opal_show_help("help-mpi-btl-openib.txt",
"ibv_fork requested but not supported", true,
orte_system_info.nodename);
return OMPI_ERROR;
}
#endif
asprintf(&str, "%s/mca-btl-openib-hca-params.ini", asprintf(&str, "%s/mca-btl-openib-hca-params.ini",
opal_install_dirs.pkgdatadir); opal_install_dirs.pkgdatadir);
if (NULL == str) { if (NULL == str) {
@ -399,5 +424,15 @@ int btl_openib_register_mca_params(void)
mca_btl_base_param_register(&mca_btl_openib_component.super.btl_version, mca_btl_base_param_register(&mca_btl_openib_component.super.btl_version,
&mca_btl_openib_module.super); &mca_btl_openib_module.super);
CHECK(reg_string("if_include",
"List of HCAs/ports to be used (eg. mthca0,mthca1:2)",
NULL, &mca_btl_openib_component.if_include,
0));
CHECK(reg_string("if_exclude",
"List of HCAs/ports to be excluded ",
NULL, &mca_btl_openib_component.if_exclude,
0));
return ret; return ret;
} }

Просмотреть файл

@ -17,7 +17,8 @@
# #
# $HEADER$ # $HEADER$
# #
# This is the US/English general help file for Open MPI. # This is the US/English help file for Open MPI's OpenFabrics support
# (the openib BTL).
# #
[ini file:file not found] [ini file:file not found]
The Open MPI OpenIB BTL component was unable to find or read an INI The Open MPI OpenIB BTL component was unable to find or read an INI
@ -26,6 +27,7 @@ parameter. Please check this file and/or modify the
btl_openib_hca_param_files MCA parameter: btl_openib_hca_param_files MCA parameter:
%s %s
#
[ini file:not in a section] [ini file:not in a section]
In parsing OpenIB BTL parameter file, values were found that were not In parsing OpenIB BTL parameter file, values were found that were not
in a valid INI section. These values will be ignored. Please in a valid INI section. These values will be ignored. Please
@ -36,6 +38,7 @@ re-check this file:
At line %d, near the following text: At line %d, near the following text:
%s %s
#
[ini file:unexpected token] [ini file:unexpected token]
In parsing OpenIB BTL parameter file, unexpected tokens were found In parsing OpenIB BTL parameter file, unexpected tokens were found
(this may cause significant portions of the INI file to be ignored). (this may cause significant portions of the INI file to be ignored).
@ -46,6 +49,7 @@ Please re-check this file:
At line %d, near the following text: At line %d, near the following text:
%s %s
#
[ini file:expected equals] [ini file:expected equals]
In parsing OpenIB BTL parameter file, unexpected tokens were found In parsing OpenIB BTL parameter file, unexpected tokens were found
(this may cause significant portions of the INI file to be ignored). (this may cause significant portions of the INI file to be ignored).
@ -57,6 +61,7 @@ this file:
At line %d, near the following text: At line %d, near the following text:
%s %s
#
[ini file:expected newline] [ini file:expected newline]
In parsing OpenIB BTL parameter file, unexpected tokens were found In parsing OpenIB BTL parameter file, unexpected tokens were found
(this may cause significant portions of the INI file to be ignored). (this may cause significant portions of the INI file to be ignored).
@ -67,6 +72,7 @@ A newline was expected but was not found. Please re-check this file:
At line %d, near the following text: At line %d, near the following text:
%s %s
#
[ini file:unknown field] [ini file:unknown field]
In parsing OpenIB BTL parameter file, an unrecognized field name was In parsing OpenIB BTL parameter file, an unrecognized field name was
found. Please re-check this file: found. Please re-check this file:
@ -78,6 +84,7 @@ At line %d, the field named:
%s %s
This field, and any other unrecognized fields, will be skipped. This field, and any other unrecognized fields, will be skipped.
#
[no hca params found] [no hca params found]
WARNING: No HCA parameters were found for the HCA that Open MPI WARNING: No HCA parameters were found for the HCA that Open MPI
detected: detected:
@ -92,6 +99,7 @@ btl_openib_hca_param_files MCA parameter to set values for your HCA.
NOTE: You can turn off this warning by setting the MCA parameter NOTE: You can turn off this warning by setting the MCA parameter
btl_openib_warn_no_hca_params_found to 0. btl_openib_warn_no_hca_params_found to 0.
#
[init-fail-no-mem] [init-fail-no-mem]
The OpenIB BTL failed to initialize while trying to allocate some The OpenIB BTL failed to initialize while trying to allocate some
locked memory. This typically can indicate that the memlock limits locked memory. This typically can indicate that the memlock limits
@ -109,6 +117,7 @@ problem fixed. This FAQ entry on the Open MPI web site may also be
helpful: helpful:
http://www.open-mpi.org/faq/?category=openfabrics#ib-locked-pages http://www.open-mpi.org/faq/?category=openfabrics#ib-locked-pages
#
[init-fail-create-q] [init-fail-create-q]
The OpenIB BTL failed to initialize while trying to create an internal The OpenIB BTL failed to initialize while trying to create an internal
queue. This typically indicates a failed OpenFabrics installation or queue. This typically indicates a failed OpenFabrics installation or
@ -122,6 +131,7 @@ faulty hardware. The failure occured here:
You may need to consult with your system administrator to get this You may need to consult with your system administrator to get this
problem fixed. problem fixed.
#
[btl_openib:retry-exceeded] [btl_openib:retry-exceeded]
The InfiniBand retry count between two MPI processes has been The InfiniBand retry count between two MPI processes has been
exceeded. "Retry count" is defined in the InfiniBand spec 1.2 exceeded. "Retry count" is defined in the InfiniBand spec 1.2
@ -148,12 +158,15 @@ respect to the retry count:
4.096 microseconds * (2^btl_openib_ib_timeout) 4.096 microseconds * (2^btl_openib_ib_timeout)
See the InfiniBand spec 1.2 (section 12.7.34) for more details. See the InfiniBand spec 1.2 (section 12.7.34) for more details.
#
[no active ports found] [no active ports found]
WARNING: There is at least on IB HCA found on host '%s', but there is WARNING: There is at least on IB HCA found on host '%s', but there is
no active ports detected. This is most certainly not what you wanted. no active ports detected. This is most certainly not what you wanted.
Check your cables and SM configuration. Check your cables and SM configuration.
#
[error in hca init] [error in hca init]
WARNING: There were errors during IB HCA initialization on host '%s'. WARNING: There were errors during IB HCA initialization on host '%s'.
#
[default subnet prefix] [default subnet prefix]
WARNING: There are more than one active ports on host '%s', but the WARNING: There are more than one active ports on host '%s', but the
default subnet GID prefix was detected on more than one of these default subnet GID prefix was detected on more than one of these
@ -169,16 +182,39 @@ Please see this FAQ entry for more details:
NOTE: You can turn off this warning by setting the MCA parameter NOTE: You can turn off this warning by setting the MCA parameter
btl_openib_warn_default_gid_prefix to 0. btl_openib_warn_default_gid_prefix to 0.
#
[wrong buffer alignment] [wrong buffer alignment]
Wrong buffer alignment %d configured on host '%s'. Should be bigger Wrong buffer alignment %d configured on host '%s'. Should be bigger
than zero and power of two. Use default %d instead. than zero and power of two. Use default %d instead.
#
[ibv_fork requested but not supported] [ibv_fork requested but not supported]
WARNING: fork() support was requested for the openib BTL, but it is WARNING: fork() support was requested for the openib BTL, but it is
not supported on the host %s. Deactivating the openib BTL. not supported on the host %s. Deactivating the openib BTL.
#
[ibv_fork_init fail] [ibv_fork_init fail]
WARNING: fork() support was requested for the openib BTL, but the WARNING: fork() support was requested for the openib BTL, but the
library call ibv_fork_init() failed on the host %s. library call ibv_fork_init() failed on the host %s.
Deactivating the openib BTL. Deactivating the openib BTL.
#
[wrong buffer alignment] [wrong buffer alignment]
Wrong buffer alignment %d configured on host '%s'. Should be bigger Wrong buffer alignment %d configured on host '%s'. Should be bigger
than zero and power of two. Use default %d instead. than zero and power of two. Use default %d instead.
#
[specified include and exclude]
ERROR: You have specified both the btl_openib_if_include and
btl_openib_if_exclude MCA parameters. These two parameters are
mutually exclusive; you can only specify one or the other.
For reference, the values that you specified are:
btl_openib_if_include: %s
btl_openib_if_exclude: %s
[nonexistent port]
WARNING: One or more nonexistent HCAs/ports were specified:
Host: %s
MCA parameter: mca_btl_if_%sclude
Nonexistent entities: %s
These entities will be ignored. You can disable this warning by
setting the btl_openib_warn_nonexistent_if MCA parameter to 0.