More updates to common verbs infrastructure:
* Moved "check basics" sanity check from openib BTL to common/verbs (which also allows us to have openib ''not'' include <infiniband/driver.h>, which is a Very Good Thing) * Add new ompi_common_verbs_qp_test() function, which tests to see whether a device supports RC and/or UD QPs. The openib BTL now uses this function to ensure that the device supports RC QPs. * Rename ompi_common_verbs_find_ibv_ports() to be ompi_common_verbs_find_ports() -- the "ibv" was redundant. * Re-work ompi_common_verbs_find_ports() to use ompi_common_verbs_qp_test() instead of testing for RC/UD QPs itself * Add bunches of opal_output_verbose() to the find_ports() routine (to help diagnosing connectivity problems -- imaging running with --mca btl_base_verbose 10; you'll see all the find_ports() test results) * Make ompi_common_verbs_qp_test() warn if devices/ports are supplied in the if_include/if_exclude strings that do not exists (quite similar to what the openib BTL does today). * Add ompi_common_verbs_mca_register() function, which registers common verbs MCA params. It will also register MCA param synonyms for thse MCA params to upper-level components (e.g., btl_<upper-level-component>_<the-mca-param>). * common_verbs_warn_nonexistent_if: warn if if_include/if_exclude-specified devices or ports do not exist. This commit was SVN r27332.
Этот коммит содержится в:
родитель
1e7b958c2a
Коммит
3cc8b0461a
@ -28,17 +28,6 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <infiniband/verbs.h>
|
||||
/* This is crummy, but <infiniband/driver.h> doesn't work on all
|
||||
platforms with all compilers. Specifically, trying to include it
|
||||
on RHEL4U3 with the PGI 32 bit compiler will cause problems because
|
||||
certain 64 bit types are not defined. Per advice from Roland D.,
|
||||
just include the one prototype that we need in this case
|
||||
(ibv_get_sysfs_path()). */
|
||||
#ifdef HAVE_INFINIBAND_DRIVER_H
|
||||
#include <infiniband/driver.h>
|
||||
#else
|
||||
const char *ibv_get_sysfs_path(void);
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
@ -284,29 +273,6 @@ static int btl_openib_component_close(void)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static bool check_basics(void)
|
||||
{
|
||||
int rc;
|
||||
char *file;
|
||||
struct stat s;
|
||||
|
||||
#if defined(__linux__)
|
||||
/* Check to see if $sysfsdir/class/infiniband/ exists */
|
||||
asprintf(&file, "%s/class/infiniband", ibv_get_sysfs_path());
|
||||
if (NULL == file) {
|
||||
return false;
|
||||
}
|
||||
rc = stat(file, &s);
|
||||
free(file);
|
||||
if (0 != rc || !S_ISDIR(s.st_mode)) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* It exists and is a directory -- good enough */
|
||||
return true;
|
||||
}
|
||||
|
||||
static void inline pack8(char **dest, uint8_t value)
|
||||
{
|
||||
/* Copy one character */
|
||||
@ -1628,10 +1594,27 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
ompi_btl_openib_ini_values_t values, default_values;
|
||||
int *allowed_ports = NULL;
|
||||
bool need_search;
|
||||
struct ibv_context *dev_context = NULL;
|
||||
|
||||
/* Open up the device */
|
||||
dev_context = ibv_open_device(ib_dev);
|
||||
if (NULL == dev_context) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Find out if this device supports RC QPs */
|
||||
if (OMPI_SUCCESS != ompi_common_verbs_qp_test(dev_context,
|
||||
OMPI_COMMON_VERBS_FLAGS_RC)) {
|
||||
ibv_close_device(dev_context);
|
||||
BTL_VERBOSE(("openib: RC QPs not supported -- skipping %s",
|
||||
ibv_get_device_name(ib_dev)));
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
device = OBJ_NEW(mca_btl_openib_device_t);
|
||||
if(NULL == device){
|
||||
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
|
||||
ibv_close_device(dev_context);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -1640,7 +1623,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
device->mem_reg_max = 1ull << 48;
|
||||
|
||||
device->ib_dev = ib_dev;
|
||||
device->ib_dev_context = ibv_open_device(ib_dev);
|
||||
device->ib_dev_context = dev_context;
|
||||
device->ib_pd = NULL;
|
||||
device->device_btls = OBJ_NEW(opal_pointer_array_t);
|
||||
if (OPAL_SUCCESS != opal_pointer_array_init(device->device_btls, 2, INT_MAX, 2)) {
|
||||
@ -2539,7 +2522,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
assume that the RDMA hardware drivers are not loaded, and
|
||||
therefore we don't want OpenFabrics verbs support in this OMPI
|
||||
job. No need to print a warning. */
|
||||
if (!check_basics()) {
|
||||
if (!ompi_common_verbs_check_basics()) {
|
||||
goto no_btls;
|
||||
}
|
||||
|
||||
|
@ -15,9 +15,12 @@ headers = \
|
||||
common_verbs.h
|
||||
|
||||
sources = \
|
||||
common_verbs_basics.c \
|
||||
common_verbs_devlist.c \
|
||||
common_verbs_find_ports.c \
|
||||
common_verbs_port.c
|
||||
common_verbs_mca.c \
|
||||
common_verbs_port.c \
|
||||
common_verbs_qp_type.c
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
help-ompi-common-verbs.txt
|
||||
|
@ -15,6 +15,11 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <infiniband/verbs.h>
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
|
||||
#include <infiniband/verbs.h>
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
@ -27,6 +32,17 @@ BEGIN_C_DECLS
|
||||
OMPI_DECLSPEC struct ibv_device **ompi_ibv_get_device_list(int *num_devs);
|
||||
OMPI_DECLSPEC void ompi_ibv_free_device_list(struct ibv_device **ib_devs);
|
||||
|
||||
/*
|
||||
* common_verbs_mca.c
|
||||
*/
|
||||
extern bool ompi_common_verbs_warn_nonexistent_if;
|
||||
OMPI_DECLSPEC void ompi_common_verbs_mca_register(mca_base_component_t *component);
|
||||
|
||||
/*
|
||||
* common_verbs_basics.c
|
||||
*/
|
||||
bool ompi_common_verbs_check_basics(void);
|
||||
|
||||
/*
|
||||
* common_verbs_find_ports.c
|
||||
*/
|
||||
@ -63,16 +79,21 @@ enum {
|
||||
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IB = 0x4,
|
||||
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IWARP = 0x8,
|
||||
/* Note that these 2 link layer flags will only be useful if
|
||||
OMPI_HAVE_IBV_LINK_LAYER is set to 1. Otherwise, they will be
|
||||
defined(HAVE_IBV_LINK_LAYER_ETHERNET). Otherwise, they will be
|
||||
ignored. */
|
||||
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB = 0x10,
|
||||
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET = 0x20,
|
||||
OMPI_COMMON_VERBS_FLAGS_MAX
|
||||
};
|
||||
|
||||
/*
|
||||
/**
|
||||
* Find a list of ibv_device ports that match a specific criteria.
|
||||
*
|
||||
* @param if_include (IN): comma-delimited list of interfaces to use
|
||||
* @param if_exclude (IN): comma-delimited list of interfaces to NOT use
|
||||
* @param flags (IN): bit flags
|
||||
* @param verbose_stream (IN): stream to send opal_output_verbose messages to
|
||||
*
|
||||
* The ports will adhere to the if_include / if_exclude lists (only
|
||||
* one can be specified). The lists are comma-delimited tokens in one
|
||||
* of two forms:
|
||||
@ -86,6 +107,9 @@ enum {
|
||||
* it includes any of the capabilities/characteristics listed in the
|
||||
* flags.
|
||||
*
|
||||
* Note that if the verbose_stream is >=0, output will be sent to that
|
||||
* stream with a verbose level of 5.
|
||||
*
|
||||
* A valid list will always be returned. It will contain zero or more
|
||||
* ompi_common_verbs_port_item_t items. Each item can be individually
|
||||
* OBJ_RELEASE'd; the destructor will take care of cleaning up the
|
||||
@ -93,9 +117,10 @@ enum {
|
||||
* port_items referring to it have been freed).
|
||||
*/
|
||||
OMPI_DECLSPEC opal_list_t *
|
||||
ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
const char *if_exclude,
|
||||
int flags);
|
||||
ompi_common_verbs_find_ports(const char *if_include,
|
||||
const char *if_exclude,
|
||||
int flags,
|
||||
int verbose_stream);
|
||||
|
||||
/*
|
||||
* Trivial function to compute the bandwidth on an ibv_port.
|
||||
@ -115,6 +140,18 @@ ompi_common_verbs_port_bw(struct ibv_port_attr *port_attr,
|
||||
OMPI_DECLSPEC int
|
||||
ompi_common_verbs_mtu(struct ibv_port_attr *port_attr);
|
||||
|
||||
/*
|
||||
* Test a device to see if it can handle a specific QP type (RC and/or
|
||||
* UD). Will return the logical AND if multiple types are specified
|
||||
* (e.g., if (RC|UD) are in flags, then will return OMPI_SUCCESS only
|
||||
* if *both* types can be created on the device).
|
||||
*
|
||||
* Flags can be the logical OR of OMPI_COMMON_VERBS_FLAGS_RC and/or
|
||||
* OMPI_COMMON_VERBS_FLAGS_UD. All other values are ignored.
|
||||
*/
|
||||
OMPI_DECLSPEC int ompi_common_verbs_qp_test(struct ibv_context *device_context,
|
||||
int flags);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
65
ompi/mca/common/verbs/common_verbs_basics.c
Обычный файл
65
ompi/mca/common/verbs/common_verbs_basics.c
Обычный файл
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
/* This is crummy, but <infiniband/driver.h> doesn't work on all
|
||||
platforms with all compilers. Specifically, trying to include it
|
||||
on RHEL4U3 with the PGI 32 bit compiler will cause problems because
|
||||
certain 64 bit types are not defined. Per advice from Roland D.,
|
||||
just include the one prototype that we need in this case
|
||||
(ibv_get_sysfs_path()). */
|
||||
#include <infiniband/verbs.h>
|
||||
#ifdef HAVE_INFINIBAND_DRIVER_H
|
||||
#include <infiniband/driver.h>
|
||||
#else
|
||||
const char *ibv_get_sysfs_path(void);
|
||||
#endif
|
||||
|
||||
#include "ompi/constants.h"
|
||||
|
||||
#include "common_verbs.h"
|
||||
|
||||
/***********************************************************************/
|
||||
|
||||
bool ompi_common_verbs_check_basics(void)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
int rc;
|
||||
char *file;
|
||||
struct stat s;
|
||||
|
||||
/* Check to see if $sysfsdir/class/infiniband/ exists */
|
||||
asprintf(&file, "%s/class/infiniband", ibv_get_sysfs_path());
|
||||
if (NULL == file) {
|
||||
return false;
|
||||
}
|
||||
rc = stat(file, &s);
|
||||
free(file);
|
||||
if (0 != rc || !S_ISDIR(s.st_mode)) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* It exists and is a directory -- good enough */
|
||||
return true;
|
||||
}
|
||||
|
@ -28,7 +28,10 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <infiniband/verbs.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/types.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
@ -157,46 +160,72 @@ static bool want_this_port(char **include_list, char **exclude_list,
|
||||
/* Will never get here */
|
||||
}
|
||||
|
||||
/***********************************************************************/
|
||||
|
||||
/*
|
||||
* It seems you can't probe a device / port to see if it supports a
|
||||
* specific type of QP. You just have to try to make it and see if it
|
||||
* works. This is a short helper function to try to make a QP of a
|
||||
* specific type and return whether it worked.
|
||||
*/
|
||||
static bool make_qp(struct ibv_pd *pd, struct ibv_cq *cq,
|
||||
enum ibv_qp_type type)
|
||||
static const char *transport_name_to_str(enum ibv_transport_type transport_type)
|
||||
{
|
||||
struct ibv_qp_init_attr qpia;
|
||||
struct ibv_qp *qp;
|
||||
switch(transport_type) {
|
||||
case IBV_TRANSPORT_IB: return "IB";
|
||||
case IBV_TRANSPORT_IWARP: return "IWARP";
|
||||
case IBV_TRANSPORT_UNKNOWN:
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
qpia.qp_context = NULL;
|
||||
qpia.send_cq = cq;
|
||||
qpia.recv_cq = cq;
|
||||
qpia.srq = NULL;
|
||||
qpia.cap.max_send_wr = 1;
|
||||
qpia.cap.max_recv_wr = 1;
|
||||
qpia.cap.max_send_sge = 1;
|
||||
qpia.cap.max_recv_sge = 1;
|
||||
qpia.cap.max_inline_data = 0;
|
||||
qpia.qp_type = type;
|
||||
qpia.sq_sig_all = 0;
|
||||
|
||||
qp = ibv_create_qp(pd, &qpia);
|
||||
if (NULL != qp) {
|
||||
ibv_destroy_qp(qp);
|
||||
return true;
|
||||
#if defined(HAVE_IBV_LINK_LAYER_ETHERNET)
|
||||
static const char *link_layer_to_str(int link_type)
|
||||
{
|
||||
switch(link_type) {
|
||||
case IBV_LINK_LAYER_INFINIBAND: return "IB";
|
||||
case IBV_LINK_LAYER_ETHERNET: return "IWARP";
|
||||
case IBV_LINK_LAYER_UNSPECIFIED:
|
||||
default: return "unspecified";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/***********************************************************************/
|
||||
|
||||
static void check_sanity(char ***if_sanity_list, const char *dev_name, int port)
|
||||
{
|
||||
int i;
|
||||
char tmp[BUFSIZ], **list = *if_sanity_list;
|
||||
const char *compare;
|
||||
|
||||
if (NULL == if_sanity_list || NULL == *if_sanity_list) {
|
||||
return;
|
||||
}
|
||||
|
||||
return false;
|
||||
/* A match is found if:
|
||||
- "dev_name" is in the list and port == -1, or
|
||||
- "dev_name:port" is in the list
|
||||
If a match is found, remove that entry from the list. */
|
||||
memset(tmp, 0, sizeof(tmp));
|
||||
if (port > 0) {
|
||||
snprintf(tmp, sizeof(tmp) - 1, "%s:%d", dev_name, port);
|
||||
compare = tmp;
|
||||
} else {
|
||||
compare = dev_name;
|
||||
}
|
||||
|
||||
for (i = 0; NULL != list[i]; ++i) {
|
||||
if (0 == strcmp(list[i], compare)) {
|
||||
int count = opal_argv_count(list);
|
||||
opal_argv_delete(&count, &list, i, 1);
|
||||
--i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/***********************************************************************/
|
||||
|
||||
/*
|
||||
* Find a list of ibv_ports matching a set of criteria.
|
||||
*/
|
||||
opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
const char *if_exclude,
|
||||
int flags)
|
||||
opal_list_t *ompi_common_verbs_find_ports(const char *if_include,
|
||||
const char *if_exclude,
|
||||
int flags,
|
||||
int stream)
|
||||
{
|
||||
int32_t num_devs;
|
||||
struct ibv_device **devices;
|
||||
@ -204,9 +233,7 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
struct ibv_context *device_context;
|
||||
struct ibv_device_attr device_attr;
|
||||
struct ibv_port_attr port_attr;
|
||||
struct ibv_pd *pd = NULL;
|
||||
struct ibv_cq *cq = NULL;
|
||||
char **if_include_list = NULL, **if_exclude_list = NULL;
|
||||
char **if_include_list = NULL, **if_exclude_list = NULL, **if_sanity_list = NULL;
|
||||
ompi_common_verbs_device_item_t *di;
|
||||
ompi_common_verbs_port_item_t *pi;
|
||||
uint32_t i, j;
|
||||
@ -224,9 +251,15 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
if (NULL != if_include && NULL != if_exclude) {
|
||||
return port_list;
|
||||
} else if (NULL != if_include) {
|
||||
opal_output_verbose(5, stream, "finding verbs interfaces, including %s",
|
||||
if_include);
|
||||
if_include_list = opal_argv_split(if_include, ',');
|
||||
if_sanity_list = opal_argv_copy(if_include_list);
|
||||
} else if (NULL != if_exclude) {
|
||||
opal_output_verbose(5, stream, "finding verbs interfaces, excluding %s",
|
||||
if_exclude);
|
||||
if_exclude_list = opal_argv_split(if_exclude, ',');
|
||||
if_sanity_list = opal_argv_copy(if_exclude_list);
|
||||
}
|
||||
|
||||
/* Query all the IBV devices on the machine. Use an ompi
|
||||
@ -234,15 +267,27 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
over the history of the IBV API. */
|
||||
devices = ompi_ibv_get_device_list(&num_devs);
|
||||
if (0 == num_devs) {
|
||||
opal_output_verbose(5, stream, "no verbs interfaces found");
|
||||
goto err_free_argv;
|
||||
} else {
|
||||
opal_output_verbose(5, stream, "found %d verbs interface%s",
|
||||
num_devs, (num_devs != 1) ? "s" : "");
|
||||
}
|
||||
|
||||
/* Now loop through all the devices. Get the attributes for each
|
||||
port on each device to see if they match our selection
|
||||
criteria. */
|
||||
for (i = 0; (int32_t) i < num_devs; ++i) {
|
||||
/* See if this device is on the include/exclude sanity check
|
||||
list. If it is, remove it from the sanity check list
|
||||
(i.e., we should end up with an empty list at the end if
|
||||
all entries in the sanity check list exist) */
|
||||
device = devices[i];
|
||||
check_sanity(&if_sanity_list, ibv_get_device_name(device), -1);
|
||||
|
||||
device_context = ibv_open_device(device);
|
||||
opal_output_verbose(5, stream, "examining verbs interface: %s",
|
||||
ibv_get_device_name(device));
|
||||
if (NULL == device_context) {
|
||||
orte_show_help("help-ompi-common-verbs.txt",
|
||||
"ibv_open_device fail", true,
|
||||
@ -261,38 +306,58 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
goto err_free_port_list;
|
||||
}
|
||||
|
||||
/* Now that we have the attributes of this device, remove all
|
||||
ports of this device from the sanity check list. Note that
|
||||
IBV ports are indexed from 1, not 0. */
|
||||
for (j = 1; j <= device_attr.phys_port_cnt; j++) {
|
||||
check_sanity(&if_sanity_list, ibv_get_device_name(device), j);
|
||||
}
|
||||
|
||||
/* Check the the device-specific flags to see if we want this
|
||||
device */
|
||||
want = true;
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IB &&
|
||||
IBV_TRANSPORT_IB != device->transport_type) {
|
||||
opal_output_verbose(5, stream, "verbs interface %s has wrong type (has %s, want IB)",
|
||||
ibv_get_device_name(device),
|
||||
transport_name_to_str(device->transport_type));
|
||||
want = false;
|
||||
}
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IWARP &&
|
||||
IBV_TRANSPORT_IWARP != device->transport_type) {
|
||||
opal_output_verbose(5, stream, "verbs interface %s has wrong type (has %s, want IWARP)",
|
||||
ibv_get_device_name(device),
|
||||
transport_name_to_str(device->transport_type));
|
||||
want = false;
|
||||
}
|
||||
|
||||
/* Check for RC or UD QP support */
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_RC ||
|
||||
flags & OMPI_COMMON_VERBS_FLAGS_UD) {
|
||||
if (OPAL_SUCCESS != ompi_common_verbs_qp_test(device_context, flags)) {
|
||||
want = false;
|
||||
opal_output_verbose(5, stream,
|
||||
"verbs interface %s:%d: failed to make %s QP",
|
||||
ibv_get_device_name(device), j,
|
||||
((flags & (OMPI_COMMON_VERBS_FLAGS_RC |
|
||||
OMPI_COMMON_VERBS_FLAGS_UD)) ==
|
||||
(OMPI_COMMON_VERBS_FLAGS_RC |
|
||||
OMPI_COMMON_VERBS_FLAGS_UD)) ?
|
||||
"both UD and RC" :
|
||||
(flags & OMPI_COMMON_VERBS_FLAGS_RC) ?
|
||||
"RC" : "UD");
|
||||
}
|
||||
}
|
||||
|
||||
/* If we didn't want it, go to the next device */
|
||||
if (!want) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If we asked for check for RC or UD support, then we'll need
|
||||
a PD and CQ for checking, below. So alloc one. */
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_RC ||
|
||||
flags & OMPI_COMMON_VERBS_FLAGS_UD) {
|
||||
pd = ibv_alloc_pd(device_context);
|
||||
cq = ibv_create_cq(device_context, 1, NULL, NULL, 0);
|
||||
if (NULL == cq || NULL == pd) {
|
||||
goto err_destroy_cq_pd;
|
||||
}
|
||||
}
|
||||
|
||||
/* Make a device_item_t to hold the device information */
|
||||
di = OBJ_NEW(ompi_common_verbs_device_item_t);
|
||||
if (NULL == di) {
|
||||
goto err_destroy_cq_pd;
|
||||
goto err_free_port_list;
|
||||
}
|
||||
di->device = device;
|
||||
di->context = device_context;
|
||||
@ -305,6 +370,8 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
/* If we don't want this port (based on if_include /
|
||||
if_exclude lists), skip it */
|
||||
if (!want_this_port(if_include_list, if_exclude_list, di, j)) {
|
||||
opal_output_verbose(5, stream, "verbs interface %s:%d: rejected by include/exclude",
|
||||
ibv_get_device_name(device), j);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -315,11 +382,13 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
orte_process_info.nodename,
|
||||
ibv_get_device_name(device),
|
||||
errno, strerror(errno));
|
||||
goto err_destroy_cq_pd;
|
||||
goto err_free_port_list;
|
||||
}
|
||||
|
||||
/* We definitely only want ACTIVE ports */
|
||||
if (IBV_PORT_ACTIVE != port_attr.state) {
|
||||
opal_output_verbose(5, stream, "verbs interface %s:%d: not ACTIVE",
|
||||
ibv_get_device_name(device), j);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -329,30 +398,35 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
if (0 == flags) {
|
||||
want = true;
|
||||
}
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_RC) {
|
||||
/* It doesn't look like you can query whether a
|
||||
device/port supports RC QP's. You just have to try
|
||||
to make one. :-( If it succeeds, the device/port
|
||||
supports it. */
|
||||
if (make_qp(pd, cq, IBV_QPT_RC)) {
|
||||
want = true;
|
||||
}
|
||||
}
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_UD) {
|
||||
/* See above comment about RC QP's -- same rationale holds
|
||||
true here. */
|
||||
if (make_qp(pd, cq, IBV_QPT_UD)) {
|
||||
want = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & (OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
|
||||
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) ==
|
||||
(OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
|
||||
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) {
|
||||
/* If they specified both link layers, then we want this port */
|
||||
want = true;
|
||||
} else if ((flags & (OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
|
||||
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) == 0) {
|
||||
/* If they specified neither link layer, then we want this port */
|
||||
want = true;
|
||||
}
|
||||
#if defined(HAVE_IBV_LINK_LAYER_ETHERNET)
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB &&
|
||||
IBV_LINK_LAYER_INFINIBAND == port_attr.link_layer) {
|
||||
want = true;
|
||||
}
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET &&
|
||||
IBV_LINK_LAYER_ETHERNET == port_attr.link_layer) {
|
||||
want = true;
|
||||
else if (flags & OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB) {
|
||||
if (IBV_LINK_LAYER_INFINIBAND == port_attr.link_layer) {
|
||||
want = true;
|
||||
} else {
|
||||
opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want IB)",
|
||||
ibv_get_device_name(device), j,
|
||||
link_layer_to_str(port_attr.link_layer));
|
||||
}
|
||||
} else if (flags & OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET) {
|
||||
if (IBV_LINK_LAYER_ETHERNET == port_attr.link_layer) {
|
||||
want = true;
|
||||
} else {
|
||||
opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want Ethernet)",
|
||||
ibv_get_device_name(device), j,
|
||||
link_layer_to_str(port_attr.link_layer));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -360,28 +434,20 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If we got this far, we want the port. Make an item for
|
||||
it and add it to the list. */
|
||||
/* If we got this far, we want the port. Make an item for it. */
|
||||
pi = OBJ_NEW(ompi_common_verbs_port_item_t);
|
||||
if (NULL == pi) {
|
||||
goto err_destroy_cq_pd;
|
||||
goto err_free_port_list;
|
||||
}
|
||||
pi->device = di;
|
||||
pi->port_num = j;
|
||||
pi->port_attr = port_attr;
|
||||
OBJ_RETAIN(di);
|
||||
|
||||
/* Add the port item to the list */
|
||||
opal_list_append(port_list, &pi->super);
|
||||
}
|
||||
|
||||
/* If we allocated a pd for testing RC/UD, free it here */
|
||||
if (NULL != pd) {
|
||||
ibv_dealloc_pd(pd);
|
||||
pd = NULL;
|
||||
}
|
||||
if (NULL != cq) {
|
||||
ibv_destroy_cq(cq);
|
||||
cq = NULL;
|
||||
opal_output_verbose(5, stream, "found acceptable verbs interface %s:%d",
|
||||
ibv_get_device_name(device), j);
|
||||
}
|
||||
|
||||
/* We're done with the device; if some ports are using it, its
|
||||
@ -390,19 +456,30 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
OBJ_RELEASE(di);
|
||||
}
|
||||
|
||||
/* Sanity check that the devices specified in the if_include /
|
||||
if_exclude lists actually existed. If this is true, then the
|
||||
sanity list will now be empty. If there are still items left
|
||||
on the list, then they didn't exist. Bad. Print a warning (if
|
||||
the warning is not disabled). */
|
||||
if (0 != opal_argv_count(if_sanity_list)) {
|
||||
if (ompi_common_verbs_warn_nonexistent_if) {
|
||||
char *str = opal_argv_join(if_sanity_list, ',');
|
||||
orte_show_help("help-ompi-common-verbs.txt", "nonexistent port",
|
||||
true, orte_process_info.nodename,
|
||||
((NULL != if_include) ? "in" : "ex"), str);
|
||||
free(str);
|
||||
|
||||
/* Only warn once per process */
|
||||
ompi_common_verbs_warn_nonexistent_if = false;
|
||||
}
|
||||
}
|
||||
if (NULL != if_sanity_list) {
|
||||
opal_argv_free(if_sanity_list);
|
||||
}
|
||||
|
||||
/* All done! */
|
||||
return port_list;
|
||||
|
||||
err_destroy_cq_pd:
|
||||
if (NULL != pd) {
|
||||
ibv_dealloc_pd(pd);
|
||||
pd = NULL;
|
||||
}
|
||||
if (NULL != cq) {
|
||||
ibv_destroy_cq(cq);
|
||||
cq = NULL;
|
||||
}
|
||||
|
||||
err_free_port_list:
|
||||
for (item = opal_list_remove_first(port_list);
|
||||
item != NULL;
|
||||
@ -411,6 +488,10 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
|
||||
}
|
||||
|
||||
err_free_argv:
|
||||
if (NULL != if_sanity_list) {
|
||||
opal_argv_free(if_sanity_list);
|
||||
if_sanity_list = NULL;
|
||||
}
|
||||
opal_argv_free(if_include_list);
|
||||
if_include_list = NULL;
|
||||
opal_argv_free(if_exclude_list);
|
||||
|
56
ompi/mca/common/verbs/common_verbs_mca.c
Обычный файл
56
ompi/mca/common/verbs/common_verbs_mca.c
Обычный файл
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "common_verbs.h"
|
||||
|
||||
/***********************************************************************/
|
||||
|
||||
static bool registered = false;
|
||||
static int warn_nonexistent_if_index = -1;
|
||||
|
||||
bool ompi_common_verbs_warn_nonexistent_if = true;
|
||||
|
||||
static void register_internal(void)
|
||||
{
|
||||
int ival;
|
||||
|
||||
warn_nonexistent_if_index =
|
||||
mca_base_param_reg_int_name("ompi_common_verbs",
|
||||
"warn_nonexistent_if",
|
||||
"Warn if non-existent devices and/or ports are specified in device include/exclude MCA parameters "
|
||||
"(0 = do not warn; any other value = warn)",
|
||||
false, false,
|
||||
(int) ompi_common_verbs_warn_nonexistent_if,
|
||||
&ival);
|
||||
ompi_common_verbs_warn_nonexistent_if = (bool) ival;
|
||||
|
||||
registered = true;
|
||||
}
|
||||
|
||||
void ompi_common_verbs_mca_register(mca_base_component_t *component)
|
||||
{
|
||||
int ival;
|
||||
|
||||
if (!registered) {
|
||||
register_internal();
|
||||
}
|
||||
|
||||
/* Make synonyms for the common_verbs MCA params. Need to look up
|
||||
the value again, because a new/different value may have been
|
||||
set by the new synonym name. */
|
||||
mca_base_param_reg_syn(warn_nonexistent_if_index, component,
|
||||
"warn_nonexistent_if", false);
|
||||
mca_base_param_lookup_int(warn_nonexistent_if_index, &ival);
|
||||
ompi_common_verbs_warn_nonexistent_if = (bool) ival;
|
||||
}
|
99
ompi/mca/common/verbs/common_verbs_qp_type.c
Обычный файл
99
ompi/mca/common/verbs/common_verbs_qp_type.c
Обычный файл
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <infiniband/verbs.h>
|
||||
|
||||
#include "ompi/constants.h"
|
||||
|
||||
#include "common_verbs.h"
|
||||
|
||||
/*
|
||||
* It seems you can't probe a device / port to see if it supports a
|
||||
* specific type of QP. You just have to try to make it and see if it
|
||||
* works. This is a short helper function to try to make a QP of a
|
||||
* specific type and return whether it worked.
|
||||
*/
|
||||
static bool make_qp(struct ibv_pd *pd, struct ibv_cq *cq, enum ibv_qp_type type)
|
||||
{
|
||||
struct ibv_qp_init_attr qpia;
|
||||
struct ibv_qp *qp;
|
||||
|
||||
memset(&qpia, 0, sizeof(qpia));
|
||||
qpia.qp_context = NULL;
|
||||
qpia.send_cq = cq;
|
||||
qpia.recv_cq = cq;
|
||||
qpia.srq = NULL;
|
||||
qpia.cap.max_send_wr = 1;
|
||||
qpia.cap.max_recv_wr = 1;
|
||||
qpia.cap.max_send_sge = 1;
|
||||
qpia.cap.max_recv_sge = 1;
|
||||
qpia.cap.max_inline_data = 0;
|
||||
qpia.qp_type = type;
|
||||
qpia.sq_sig_all = 0;
|
||||
|
||||
qp = ibv_create_qp(pd, &qpia);
|
||||
if (NULL != qp) {
|
||||
ibv_destroy_qp(qp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int ompi_common_verbs_qp_test(struct ibv_context *device_context, int flags)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
struct ibv_pd *pd = NULL;
|
||||
struct ibv_cq *cq = NULL;
|
||||
|
||||
/* Bozo check */
|
||||
if (NULL == device_context ||
|
||||
(0 == (flags & (OMPI_COMMON_VERBS_FLAGS_RC | OMPI_COMMON_VERBS_FLAGS_UD)))) {
|
||||
return OMPI_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Try to make both the PD and CQ */
|
||||
pd = ibv_alloc_pd(device_context);
|
||||
if (NULL == pd) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
cq = ibv_create_cq(device_context, 1, NULL, NULL, 0);
|
||||
if (NULL == cq) {
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Now try to make the QP(s) of the desired type(s) */
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_RC &&
|
||||
!make_qp(pd, cq, IBV_QPT_RC)) {
|
||||
rc = OMPI_ERR_NOT_SUPPORTED;
|
||||
goto out;
|
||||
}
|
||||
if (flags & OMPI_COMMON_VERBS_FLAGS_UD &&
|
||||
!make_qp(pd, cq, IBV_QPT_UD)) {
|
||||
rc = OMPI_ERR_NOT_SUPPORTED;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
/* Free the PD and/or CQ */
|
||||
if (NULL != pd) {
|
||||
ibv_dealloc_pd(pd);
|
||||
}
|
||||
if (NULL != cq) {
|
||||
ibv_destroy_cq(cq);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
@ -34,3 +34,15 @@ job may or may not continue.
|
||||
Hostname: %s
|
||||
Device name: %s
|
||||
Errror (%d): %s
|
||||
#
|
||||
[nonexistent port]
|
||||
WARNING: One or more nonexistent OpenFabrics devices/ports were
|
||||
specified:
|
||||
|
||||
Host: %s
|
||||
MCA parameter: ompi_common_verbs_%sclude
|
||||
Nonexistent entities: %s
|
||||
|
||||
These entities will be ignored. You can disable this warning by
|
||||
setting the ompi_common_verbs_warn_nonexistent_if MCA parameter to 0.
|
||||
#
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user