Bring over the functionality from the /tmp/jnysal-openib-wireup
branch: * Support btl_openib_if_include and btl_openib_if_exclude MCA parameters, similar to those supported by other BTLs. Each take a comma-delimited lists of identifiers. Identifiers can be HCA interface names (e.g., ipath0, mthca1, etc.) or an HCA interface name and port numbers (e.g., ipath0:1, mthca1:2, etc.). It is an error to specify both _include and _exclude. If you specify a non-existant (or non-ACTIVE) HCA and/or port, you'll get a warning unless you disable the warning by setting the MCA parameter btl_openib_warn_nonexistent_if to 0. * Start updating to use BEGIN_C_DECLS and END_C_DECLS * A few other minor fixes that were picked up along the way. This commit was SVN r15063.
Этот коммит содержится в:
родитель
de0f1eef89
Коммит
1e18265c16
@ -44,9 +44,7 @@
|
|||||||
|
|
||||||
#include "btl_openib_frag.h"
|
#include "btl_openib_frag.h"
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
BEGIN_C_DECLS
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MCA_BTL_IB_LEAVE_PINNED 1
|
#define MCA_BTL_IB_LEAVE_PINNED 1
|
||||||
#define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
|
#define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
|
||||||
@ -129,6 +127,10 @@ struct mca_btl_openib_component_t {
|
|||||||
#if OMPI_HAVE_POSIX_THREADS
|
#if OMPI_HAVE_POSIX_THREADS
|
||||||
int32_t fatal_counter; /**< Counts number on fatal events that we got on all hcas */
|
int32_t fatal_counter; /**< Counts number on fatal events that we got on all hcas */
|
||||||
#endif
|
#endif
|
||||||
|
char *if_include;
|
||||||
|
char **if_include_list;
|
||||||
|
char *if_exclude;
|
||||||
|
char **if_exclude_list;
|
||||||
|
|
||||||
/** Colon-delimited list of filenames for HCA parameters */
|
/** Colon-delimited list of filenames for HCA parameters */
|
||||||
char *hca_params_file_names;
|
char *hca_params_file_names;
|
||||||
@ -142,6 +144,13 @@ struct mca_btl_openib_component_t {
|
|||||||
/** Whether we want a warning if non default GID prefix is not configured
|
/** Whether we want a warning if non default GID prefix is not configured
|
||||||
on multiport setup */
|
on multiport setup */
|
||||||
bool warn_default_gid_prefix;
|
bool warn_default_gid_prefix;
|
||||||
|
/** Whether we want a warning if the user specifies a non-existent
|
||||||
|
HCA and/or port via btl_openib_if_[in|ex]clude MCA params */
|
||||||
|
bool warn_nonexistent_if;
|
||||||
|
/** Dummy argv-style list; a copy of names from the
|
||||||
|
if_[in|ex]clude list that we use for error checking (to ensure
|
||||||
|
that they all exist) */
|
||||||
|
char **if_list;
|
||||||
#ifdef HAVE_IBV_FORK_INIT
|
#ifdef HAVE_IBV_FORK_INIT
|
||||||
/** Whether we want fork support or not */
|
/** Whether we want fork support or not */
|
||||||
int want_fork_support;
|
int want_fork_support;
|
||||||
@ -505,7 +514,6 @@ static inline int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl,
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
END_C_DECLS
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif /* MCA_BTL_IB_H */
|
#endif /* MCA_BTL_IB_H */
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -32,6 +32,7 @@
|
|||||||
#include "ompi/mca/btl/btl.h"
|
#include "ompi/mca/btl/btl.h"
|
||||||
#include "opal/sys/timer.h"
|
#include "opal/sys/timer.h"
|
||||||
#include "opal/sys/atomic.h"
|
#include "opal/sys/atomic.h"
|
||||||
|
#include "opal/util/argv.h"
|
||||||
|
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
@ -87,6 +88,7 @@ static void btl_openib_frag_progress_pending(
|
|||||||
static int openib_reg_mr(void *reg_data, void *base, size_t size,
|
static int openib_reg_mr(void *reg_data, void *base, size_t size,
|
||||||
mca_mpool_base_registration_t *reg);
|
mca_mpool_base_registration_t *reg);
|
||||||
static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
|
static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
|
||||||
|
static int get_port_list(mca_btl_openib_hca_t *hca, int *allowed_ports);
|
||||||
#if OMPI_HAVE_POSIX_THREADS
|
#if OMPI_HAVE_POSIX_THREADS
|
||||||
void* btl_openib_async_thread(void *one_hca);
|
void* btl_openib_async_thread(void *one_hca);
|
||||||
#endif
|
#endif
|
||||||
@ -461,10 +463,11 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
{
|
{
|
||||||
struct mca_mpool_base_resources_t mpool_resources;
|
struct mca_mpool_base_resources_t mpool_resources;
|
||||||
mca_btl_openib_hca_t *hca;
|
mca_btl_openib_hca_t *hca;
|
||||||
uint8_t i;
|
uint8_t i, k = 0;
|
||||||
int ret = -1;
|
int ret = -1, port_cnt;
|
||||||
ompi_btl_openib_ini_values_t values, default_values;
|
ompi_btl_openib_ini_values_t values, default_values;
|
||||||
|
int *allowed_ports;
|
||||||
|
|
||||||
hca = malloc(sizeof(mca_btl_openib_hca_t));
|
hca = malloc(sizeof(mca_btl_openib_hca_t));
|
||||||
if(NULL == hca){
|
if(NULL == hca){
|
||||||
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
|
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
|
||||||
@ -486,7 +489,13 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
ibv_get_device_name(ib_dev), strerror(errno)));
|
ibv_get_device_name(ib_dev), strerror(errno)));
|
||||||
goto close_hca;
|
goto close_hca;
|
||||||
}
|
}
|
||||||
|
/* If mca_btl_if_include/exclude were specified, get usable ports */
|
||||||
|
allowed_ports = (int*) malloc(hca->ib_dev_attr.phys_port_cnt * sizeof(int));
|
||||||
|
port_cnt = get_port_list(hca, allowed_ports);
|
||||||
|
if(0 == port_cnt) {
|
||||||
|
ret = OMPI_SUCCESS;
|
||||||
|
goto close_hca;
|
||||||
|
}
|
||||||
/* Load in vendor/part-specific HCA parameters. Note that even if
|
/* Load in vendor/part-specific HCA parameters. Note that even if
|
||||||
we don't find values for this vendor/part, "values" will be set
|
we don't find values for this vendor/part, "values" will be set
|
||||||
indicating that it does not have good values */
|
indicating that it does not have good values */
|
||||||
@ -583,17 +592,16 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
|
|
||||||
ret = OMPI_SUCCESS;
|
ret = OMPI_SUCCESS;
|
||||||
|
|
||||||
/* Note ports are 1 based hence j = 1 */
|
/* Note ports are 1 based (i >= 1) */
|
||||||
for(i = 1; i <= hca->ib_dev_attr.phys_port_cnt; i++){
|
for(k = 0; k < port_cnt; k++){
|
||||||
struct ibv_port_attr ib_port_attr;
|
struct ibv_port_attr ib_port_attr;
|
||||||
|
i = allowed_ports[k];
|
||||||
if(ibv_query_port(hca->ib_dev_context, i, &ib_port_attr)){
|
if(ibv_query_port(hca->ib_dev_context, i, &ib_port_attr)){
|
||||||
BTL_ERROR(("error getting port attributes for device %s "
|
BTL_ERROR(("error getting port attributes for device %s "
|
||||||
"port number %d errno says %s",
|
"port number %d errno says %s",
|
||||||
ibv_get_device_name(ib_dev), i, strerror(errno)));
|
ibv_get_device_name(ib_dev), i, strerror(errno)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(IBV_PORT_ACTIVE == ib_port_attr.state){
|
if(IBV_PORT_ACTIVE == ib_port_attr.state){
|
||||||
|
|
||||||
if (0 == mca_btl_openib_component.ib_pkey_val) {
|
if (0 == mca_btl_openib_component.ib_pkey_val) {
|
||||||
@ -663,6 +671,9 @@ dealloc_pd:
|
|||||||
ibv_dealloc_pd(hca->ib_pd);
|
ibv_dealloc_pd(hca->ib_pd);
|
||||||
close_hca:
|
close_hca:
|
||||||
ibv_close_device(hca->ib_dev_context);
|
ibv_close_device(hca->ib_dev_context);
|
||||||
|
if(NULL != allowed_ports) {
|
||||||
|
free(allowed_ports);
|
||||||
|
}
|
||||||
free_hca:
|
free_hca:
|
||||||
free(hca);
|
free(hca);
|
||||||
return ret;
|
return ret;
|
||||||
@ -705,7 +716,7 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
|
|
||||||
/* Read in INI files with HCA-specific parameters */
|
/* Read in INI files with HCA-specific parameters */
|
||||||
if (OMPI_SUCCESS != (ret = ompi_btl_openib_ini_init())) {
|
if (OMPI_SUCCESS != (ret = ompi_btl_openib_ini_init())) {
|
||||||
return NULL;
|
goto no_btls;
|
||||||
}
|
}
|
||||||
#if OMPI_HAVE_POSIX_THREADS
|
#if OMPI_HAVE_POSIX_THREADS
|
||||||
/* Set the fatal counter to zero */
|
/* Set the fatal counter to zero */
|
||||||
@ -724,14 +735,36 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
opal_show_help("help-mpi-btl-openib.txt",
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
"ibv_fork_init fail", true,
|
"ibv_fork_init fail", true,
|
||||||
orte_system_info.nodename);
|
orte_system_info.nodename);
|
||||||
mca_btl_openib_component.ib_num_btls = 0;
|
goto no_btls;
|
||||||
btl_openib_modex_send();
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Parse the include and exclude lists, checking for errors */
|
||||||
|
|
||||||
|
mca_btl_openib_component.if_include_list =
|
||||||
|
mca_btl_openib_component.if_exclude_list =
|
||||||
|
mca_btl_openib_component.if_list = NULL;
|
||||||
|
if (NULL != mca_btl_openib_component.if_include &&
|
||||||
|
NULL != mca_btl_openib_component.if_exclude) {
|
||||||
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
|
"specified include and exclude", true,
|
||||||
|
mca_btl_openib_component.if_include,
|
||||||
|
mca_btl_openib_component.if_exclude, NULL);
|
||||||
|
goto no_btls;
|
||||||
|
} else if (NULL != mca_btl_openib_component.if_include) {
|
||||||
|
mca_btl_openib_component.if_include_list =
|
||||||
|
opal_argv_split(mca_btl_openib_component.if_include, ',');
|
||||||
|
mca_btl_openib_component.if_list =
|
||||||
|
opal_argv_copy(mca_btl_openib_component.if_include_list);
|
||||||
|
} else if (NULL != mca_btl_openib_component.if_exclude) {
|
||||||
|
mca_btl_openib_component.if_exclude_list =
|
||||||
|
opal_argv_split(mca_btl_openib_component.if_exclude, ',');
|
||||||
|
mca_btl_openib_component.if_list =
|
||||||
|
opal_argv_copy(mca_btl_openib_component.if_exclude_list);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef HAVE_IBV_GET_DEVICE_LIST
|
#ifdef HAVE_IBV_GET_DEVICE_LIST
|
||||||
ib_devs = ibv_get_device_list(&num_devs);
|
ib_devs = ibv_get_device_list(&num_devs);
|
||||||
#else
|
#else
|
||||||
@ -776,7 +809,6 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
|
|
||||||
OBJ_CONSTRUCT(&btl_list, opal_list_t);
|
OBJ_CONSTRUCT(&btl_list, opal_list_t);
|
||||||
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_lock, opal_mutex_t);
|
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_lock, opal_mutex_t);
|
||||||
|
|
||||||
for (i = 0; i < num_devs &&
|
for (i = 0; i < num_devs &&
|
||||||
(-1 == mca_btl_openib_component.ib_max_btls ||
|
(-1 == mca_btl_openib_component.ib_max_btls ||
|
||||||
mca_btl_openib_component.ib_num_btls <
|
mca_btl_openib_component.ib_num_btls <
|
||||||
@ -790,6 +822,21 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
opal_show_help("help-mpi-btl-openib.txt",
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
"error in hca init", true, orte_system_info.nodename);
|
"error in hca init", true, orte_system_info.nodename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If we got back from checking all the HCAs and find that there
|
||||||
|
are still items in the component.if_list, that means that they
|
||||||
|
didn't exist. Show an appropriate warning if the warning was
|
||||||
|
not disabled. */
|
||||||
|
|
||||||
|
if (0 != opal_argv_count(mca_btl_openib_component.if_list) &&
|
||||||
|
mca_btl_openib_component.warn_nonexistent_if) {
|
||||||
|
char *str = opal_argv_join(mca_btl_openib_component.if_list, ',');
|
||||||
|
opal_show_help("help-mpi-btl-openib.txt", "nonexistent port",
|
||||||
|
true, orte_system_info.nodename,
|
||||||
|
((NULL != mca_btl_openib_component.if_include) ?
|
||||||
|
"in" : "ex"), str);
|
||||||
|
free(str);
|
||||||
|
}
|
||||||
|
|
||||||
if(0 == mca_btl_openib_component.ib_num_btls) {
|
if(0 == mca_btl_openib_component.ib_num_btls) {
|
||||||
opal_show_help("help-mpi-btl-openib.txt",
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
@ -962,7 +1009,23 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
#else
|
#else
|
||||||
free(ib_devs);
|
free(ib_devs);
|
||||||
#endif
|
#endif
|
||||||
|
if (NULL != mca_btl_openib_component.if_include_list) {
|
||||||
|
opal_argv_free(mca_btl_openib_component.if_include_list);
|
||||||
|
mca_btl_openib_component.if_include_list = NULL;
|
||||||
|
}
|
||||||
|
if (NULL != mca_btl_openib_component.if_exclude_list) {
|
||||||
|
opal_argv_free(mca_btl_openib_component.if_exclude_list);
|
||||||
|
mca_btl_openib_component.if_exclude_list = NULL;
|
||||||
|
}
|
||||||
return btls;
|
return btls;
|
||||||
|
|
||||||
|
no_btls:
|
||||||
|
/* If we fail early enough in the setup, we just modex around that
|
||||||
|
there are no openib BTL's in this process and return NULL. */
|
||||||
|
|
||||||
|
mca_btl_openib_component.ib_num_btls = 0;
|
||||||
|
btl_openib_modex_send();
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1514,3 +1577,114 @@ error:
|
|||||||
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL);
|
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL);
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
get_port_list(mca_btl_openib_hca_t *hca, int *allowed_ports)
|
||||||
|
{
|
||||||
|
int i, j, k, num_ports = 0;
|
||||||
|
const char *dev_name;
|
||||||
|
char *name;
|
||||||
|
|
||||||
|
dev_name = ibv_get_device_name(hca->ib_dev);
|
||||||
|
name = (char*) malloc(strlen(dev_name) + 4);
|
||||||
|
if (NULL == name) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assume that all ports are allowed. num_ports will be adjusted
|
||||||
|
below to reflect whether this is true or not. */
|
||||||
|
for (i = 1; i <= hca->ib_dev_attr.phys_port_cnt; ++i) {
|
||||||
|
allowed_ports[num_ports++] = i;
|
||||||
|
}
|
||||||
|
num_ports = 0;
|
||||||
|
if (NULL != mca_btl_openib_component.if_include_list) {
|
||||||
|
/* If only the HCA name is given (eg. mthca0,mthca1) use all
|
||||||
|
ports */
|
||||||
|
i = 0;
|
||||||
|
while (mca_btl_openib_component.if_include_list[i]) {
|
||||||
|
if (0 == strcmp(dev_name,
|
||||||
|
mca_btl_openib_component.if_include_list[i])) {
|
||||||
|
num_ports = hca->ib_dev_attr.phys_port_cnt;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
/* Include only requested ports on the HCA */
|
||||||
|
for (i = 1; i <= hca->ib_dev_attr.phys_port_cnt; ++i) {
|
||||||
|
sprintf(name,"%s:%d",dev_name,i);
|
||||||
|
for (j = 0;
|
||||||
|
NULL != mca_btl_openib_component.if_include_list[j]; ++j) {
|
||||||
|
if (0 == strcmp(name,
|
||||||
|
mca_btl_openib_component.if_include_list[j])) {
|
||||||
|
allowed_ports[num_ports++] = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (NULL != mca_btl_openib_component.if_exclude_list) {
|
||||||
|
/* If only the HCA name is given (eg. mthca0,mthca1) exclude
|
||||||
|
all ports */
|
||||||
|
i = 0;
|
||||||
|
while (mca_btl_openib_component.if_exclude_list[i]) {
|
||||||
|
if (0 == strcmp(dev_name,
|
||||||
|
mca_btl_openib_component.if_exclude_list[i])) {
|
||||||
|
num_ports = 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
/* Exclude the specified ports on this HCA */
|
||||||
|
for (i = 1; i <= hca->ib_dev_attr.phys_port_cnt; ++i) {
|
||||||
|
sprintf(name,"%s:%d",dev_name,i);
|
||||||
|
for (j = 0;
|
||||||
|
NULL != mca_btl_openib_component.if_exclude_list[j]; ++j) {
|
||||||
|
if (0 == strcmp(name,
|
||||||
|
mca_btl_openib_component.if_exclude_list[j])) {
|
||||||
|
/* If found, set a sentinel value */
|
||||||
|
j = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* If we didn't find it, it's ok to include in the list */
|
||||||
|
if (-1 != j) {
|
||||||
|
allowed_ports[num_ports++] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
num_ports = hca->ib_dev_attr.phys_port_cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
|
||||||
|
/* Remove the following from the error-checking if_list:
|
||||||
|
- bare device name
|
||||||
|
- device name suffixed with port number */
|
||||||
|
if (NULL != mca_btl_openib_component.if_list) {
|
||||||
|
for (i = 0; NULL != mca_btl_openib_component.if_list[i]; ++i) {
|
||||||
|
|
||||||
|
/* Look for raw device name */
|
||||||
|
if (0 == strcmp(mca_btl_openib_component.if_list[i], dev_name)) {
|
||||||
|
j = opal_argv_count(mca_btl_openib_component.if_list);
|
||||||
|
opal_argv_delete(&j, &(mca_btl_openib_component.if_list),
|
||||||
|
i, 1);
|
||||||
|
--i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = 1; i <= hca->ib_dev_attr.phys_port_cnt; ++i) {
|
||||||
|
sprintf(name, "%s:%d", dev_name, i);
|
||||||
|
for (j = 0; NULL != mca_btl_openib_component.if_list[j]; ++j) {
|
||||||
|
if (0 == strcmp(mca_btl_openib_component.if_list[j], name)) {
|
||||||
|
k = opal_argv_count(mca_btl_openib_component.if_list);
|
||||||
|
opal_argv_delete(&k, &(mca_btl_openib_component.if_list),
|
||||||
|
j, 1);
|
||||||
|
--j;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(name);
|
||||||
|
|
||||||
|
return num_ports;
|
||||||
|
}
|
||||||
|
@ -120,6 +120,31 @@ int btl_openib_register_mca_params(void)
|
|||||||
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
|
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
|
||||||
1, &ival, 0));
|
1, &ival, 0));
|
||||||
mca_btl_openib_component.warn_default_gid_prefix = (0 != ival);
|
mca_btl_openib_component.warn_default_gid_prefix = (0 != ival);
|
||||||
|
CHECK(reg_int("warn_nonexistent_if",
|
||||||
|
"Warn if non-existent HCAs and/or ports are specified in the btl_openib_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
|
||||||
|
1, &ival, 0));
|
||||||
|
mca_btl_openib_component.warn_nonexistent_if = (0 != ival);
|
||||||
|
|
||||||
|
#ifdef HAVE_IBV_FORK_INIT
|
||||||
|
ival2 = -1;
|
||||||
|
#else
|
||||||
|
ival2 = 0;
|
||||||
|
#endif
|
||||||
|
CHECK(reg_int("want_fork_support",
|
||||||
|
"Whether fork support is desired or not "
|
||||||
|
"(negative = try to enable fork support, but continue even if it is not available, 0 = do not enable fork support, positive = try to enable fork support and fail if it is not available)",
|
||||||
|
ival2, &ival, 0));
|
||||||
|
#ifdef HAVE_IBV_FORK_INIT
|
||||||
|
mca_btl_openib_component.want_fork_support = ival;
|
||||||
|
#else
|
||||||
|
if (0 != ival) {
|
||||||
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
|
"ibv_fork requested but not supported", true,
|
||||||
|
orte_system_info.nodename);
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
asprintf(&str, "%s/mca-btl-openib-hca-params.ini",
|
asprintf(&str, "%s/mca-btl-openib-hca-params.ini",
|
||||||
opal_install_dirs.pkgdatadir);
|
opal_install_dirs.pkgdatadir);
|
||||||
if (NULL == str) {
|
if (NULL == str) {
|
||||||
@ -399,5 +424,15 @@ int btl_openib_register_mca_params(void)
|
|||||||
mca_btl_base_param_register(&mca_btl_openib_component.super.btl_version,
|
mca_btl_base_param_register(&mca_btl_openib_component.super.btl_version,
|
||||||
&mca_btl_openib_module.super);
|
&mca_btl_openib_module.super);
|
||||||
|
|
||||||
|
CHECK(reg_string("if_include",
|
||||||
|
"List of HCAs/ports to be used (eg. mthca0,mthca1:2)",
|
||||||
|
NULL, &mca_btl_openib_component.if_include,
|
||||||
|
0));
|
||||||
|
|
||||||
|
CHECK(reg_string("if_exclude",
|
||||||
|
"List of HCAs/ports to be excluded ",
|
||||||
|
NULL, &mca_btl_openib_component.if_exclude,
|
||||||
|
0));
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,8 @@
|
|||||||
#
|
#
|
||||||
# $HEADER$
|
# $HEADER$
|
||||||
#
|
#
|
||||||
# This is the US/English general help file for Open MPI.
|
# This is the US/English help file for Open MPI's OpenFabrics support
|
||||||
|
# (the openib BTL).
|
||||||
#
|
#
|
||||||
[ini file:file not found]
|
[ini file:file not found]
|
||||||
The Open MPI OpenIB BTL component was unable to find or read an INI
|
The Open MPI OpenIB BTL component was unable to find or read an INI
|
||||||
@ -26,6 +27,7 @@ parameter. Please check this file and/or modify the
|
|||||||
btl_openib_hca_param_files MCA parameter:
|
btl_openib_hca_param_files MCA parameter:
|
||||||
|
|
||||||
%s
|
%s
|
||||||
|
#
|
||||||
[ini file:not in a section]
|
[ini file:not in a section]
|
||||||
In parsing OpenIB BTL parameter file, values were found that were not
|
In parsing OpenIB BTL parameter file, values were found that were not
|
||||||
in a valid INI section. These values will be ignored. Please
|
in a valid INI section. These values will be ignored. Please
|
||||||
@ -36,6 +38,7 @@ re-check this file:
|
|||||||
At line %d, near the following text:
|
At line %d, near the following text:
|
||||||
|
|
||||||
%s
|
%s
|
||||||
|
#
|
||||||
[ini file:unexpected token]
|
[ini file:unexpected token]
|
||||||
In parsing OpenIB BTL parameter file, unexpected tokens were found
|
In parsing OpenIB BTL parameter file, unexpected tokens were found
|
||||||
(this may cause significant portions of the INI file to be ignored).
|
(this may cause significant portions of the INI file to be ignored).
|
||||||
@ -46,6 +49,7 @@ Please re-check this file:
|
|||||||
At line %d, near the following text:
|
At line %d, near the following text:
|
||||||
|
|
||||||
%s
|
%s
|
||||||
|
#
|
||||||
[ini file:expected equals]
|
[ini file:expected equals]
|
||||||
In parsing OpenIB BTL parameter file, unexpected tokens were found
|
In parsing OpenIB BTL parameter file, unexpected tokens were found
|
||||||
(this may cause significant portions of the INI file to be ignored).
|
(this may cause significant portions of the INI file to be ignored).
|
||||||
@ -57,6 +61,7 @@ this file:
|
|||||||
At line %d, near the following text:
|
At line %d, near the following text:
|
||||||
|
|
||||||
%s
|
%s
|
||||||
|
#
|
||||||
[ini file:expected newline]
|
[ini file:expected newline]
|
||||||
In parsing OpenIB BTL parameter file, unexpected tokens were found
|
In parsing OpenIB BTL parameter file, unexpected tokens were found
|
||||||
(this may cause significant portions of the INI file to be ignored).
|
(this may cause significant portions of the INI file to be ignored).
|
||||||
@ -67,6 +72,7 @@ A newline was expected but was not found. Please re-check this file:
|
|||||||
At line %d, near the following text:
|
At line %d, near the following text:
|
||||||
|
|
||||||
%s
|
%s
|
||||||
|
#
|
||||||
[ini file:unknown field]
|
[ini file:unknown field]
|
||||||
In parsing OpenIB BTL parameter file, an unrecognized field name was
|
In parsing OpenIB BTL parameter file, an unrecognized field name was
|
||||||
found. Please re-check this file:
|
found. Please re-check this file:
|
||||||
@ -78,6 +84,7 @@ At line %d, the field named:
|
|||||||
%s
|
%s
|
||||||
|
|
||||||
This field, and any other unrecognized fields, will be skipped.
|
This field, and any other unrecognized fields, will be skipped.
|
||||||
|
#
|
||||||
[no hca params found]
|
[no hca params found]
|
||||||
WARNING: No HCA parameters were found for the HCA that Open MPI
|
WARNING: No HCA parameters were found for the HCA that Open MPI
|
||||||
detected:
|
detected:
|
||||||
@ -92,6 +99,7 @@ btl_openib_hca_param_files MCA parameter to set values for your HCA.
|
|||||||
|
|
||||||
NOTE: You can turn off this warning by setting the MCA parameter
|
NOTE: You can turn off this warning by setting the MCA parameter
|
||||||
btl_openib_warn_no_hca_params_found to 0.
|
btl_openib_warn_no_hca_params_found to 0.
|
||||||
|
#
|
||||||
[init-fail-no-mem]
|
[init-fail-no-mem]
|
||||||
The OpenIB BTL failed to initialize while trying to allocate some
|
The OpenIB BTL failed to initialize while trying to allocate some
|
||||||
locked memory. This typically can indicate that the memlock limits
|
locked memory. This typically can indicate that the memlock limits
|
||||||
@ -109,6 +117,7 @@ problem fixed. This FAQ entry on the Open MPI web site may also be
|
|||||||
helpful:
|
helpful:
|
||||||
|
|
||||||
http://www.open-mpi.org/faq/?category=openfabrics#ib-locked-pages
|
http://www.open-mpi.org/faq/?category=openfabrics#ib-locked-pages
|
||||||
|
#
|
||||||
[init-fail-create-q]
|
[init-fail-create-q]
|
||||||
The OpenIB BTL failed to initialize while trying to create an internal
|
The OpenIB BTL failed to initialize while trying to create an internal
|
||||||
queue. This typically indicates a failed OpenFabrics installation or
|
queue. This typically indicates a failed OpenFabrics installation or
|
||||||
@ -122,6 +131,7 @@ faulty hardware. The failure occured here:
|
|||||||
|
|
||||||
You may need to consult with your system administrator to get this
|
You may need to consult with your system administrator to get this
|
||||||
problem fixed.
|
problem fixed.
|
||||||
|
#
|
||||||
[btl_openib:retry-exceeded]
|
[btl_openib:retry-exceeded]
|
||||||
The InfiniBand retry count between two MPI processes has been
|
The InfiniBand retry count between two MPI processes has been
|
||||||
exceeded. "Retry count" is defined in the InfiniBand spec 1.2
|
exceeded. "Retry count" is defined in the InfiniBand spec 1.2
|
||||||
@ -148,12 +158,15 @@ respect to the retry count:
|
|||||||
4.096 microseconds * (2^btl_openib_ib_timeout)
|
4.096 microseconds * (2^btl_openib_ib_timeout)
|
||||||
|
|
||||||
See the InfiniBand spec 1.2 (section 12.7.34) for more details.
|
See the InfiniBand spec 1.2 (section 12.7.34) for more details.
|
||||||
|
#
|
||||||
[no active ports found]
|
[no active ports found]
|
||||||
WARNING: There is at least on IB HCA found on host '%s', but there is
|
WARNING: There is at least on IB HCA found on host '%s', but there is
|
||||||
no active ports detected. This is most certainly not what you wanted.
|
no active ports detected. This is most certainly not what you wanted.
|
||||||
Check your cables and SM configuration.
|
Check your cables and SM configuration.
|
||||||
|
#
|
||||||
[error in hca init]
|
[error in hca init]
|
||||||
WARNING: There were errors during IB HCA initialization on host '%s'.
|
WARNING: There were errors during IB HCA initialization on host '%s'.
|
||||||
|
#
|
||||||
[default subnet prefix]
|
[default subnet prefix]
|
||||||
WARNING: There are more than one active ports on host '%s', but the
|
WARNING: There are more than one active ports on host '%s', but the
|
||||||
default subnet GID prefix was detected on more than one of these
|
default subnet GID prefix was detected on more than one of these
|
||||||
@ -169,16 +182,39 @@ Please see this FAQ entry for more details:
|
|||||||
|
|
||||||
NOTE: You can turn off this warning by setting the MCA parameter
|
NOTE: You can turn off this warning by setting the MCA parameter
|
||||||
btl_openib_warn_default_gid_prefix to 0.
|
btl_openib_warn_default_gid_prefix to 0.
|
||||||
|
#
|
||||||
[wrong buffer alignment]
|
[wrong buffer alignment]
|
||||||
Wrong buffer alignment %d configured on host '%s'. Should be bigger
|
Wrong buffer alignment %d configured on host '%s'. Should be bigger
|
||||||
than zero and power of two. Use default %d instead.
|
than zero and power of two. Use default %d instead.
|
||||||
|
#
|
||||||
[ibv_fork requested but not supported]
|
[ibv_fork requested but not supported]
|
||||||
WARNING: fork() support was requested for the openib BTL, but it is
|
WARNING: fork() support was requested for the openib BTL, but it is
|
||||||
not supported on the host %s. Deactivating the openib BTL.
|
not supported on the host %s. Deactivating the openib BTL.
|
||||||
|
#
|
||||||
[ibv_fork_init fail]
|
[ibv_fork_init fail]
|
||||||
WARNING: fork() support was requested for the openib BTL, but the
|
WARNING: fork() support was requested for the openib BTL, but the
|
||||||
library call ibv_fork_init() failed on the host %s.
|
library call ibv_fork_init() failed on the host %s.
|
||||||
Deactivating the openib BTL.
|
Deactivating the openib BTL.
|
||||||
|
#
|
||||||
[wrong buffer alignment]
|
[wrong buffer alignment]
|
||||||
Wrong buffer alignment %d configured on host '%s'. Should be bigger
|
Wrong buffer alignment %d configured on host '%s'. Should be bigger
|
||||||
than zero and power of two. Use default %d instead.
|
than zero and power of two. Use default %d instead.
|
||||||
|
#
|
||||||
|
[specified include and exclude]
|
||||||
|
ERROR: You have specified both the btl_openib_if_include and
|
||||||
|
btl_openib_if_exclude MCA parameters. These two parameters are
|
||||||
|
mutually exclusive; you can only specify one or the other.
|
||||||
|
|
||||||
|
For reference, the values that you specified are:
|
||||||
|
|
||||||
|
btl_openib_if_include: %s
|
||||||
|
btl_openib_if_exclude: %s
|
||||||
|
[nonexistent port]
|
||||||
|
WARNING: One or more nonexistent HCAs/ports were specified:
|
||||||
|
|
||||||
|
Host: %s
|
||||||
|
MCA parameter: mca_btl_if_%sclude
|
||||||
|
Nonexistent entities: %s
|
||||||
|
|
||||||
|
These entities will be ignored. You can disable this warning by
|
||||||
|
setting the btl_openib_warn_nonexistent_if MCA parameter to 0.
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user