1
1

More updates to common verbs infrastructure:

* Moved "check basics" sanity check from openib BTL to common/verbs
   (which also allows us to have openib ''not'' include
   <infiniband/driver.h>, which is a Very Good Thing)
 * Add new ompi_common_verbs_qp_test() function, which tests to see
   whether a device supports RC and/or UD QPs.  The openib BTL now
   uses this function to ensure that the device supports RC QPs.
 * Rename ompi_common_verbs_find_ibv_ports() to be
   ompi_common_verbs_find_ports() -- the "ibv" was redundant.
 * Re-work ompi_common_verbs_find_ports() to use
   ompi_common_verbs_qp_test() instead of testing for RC/UD QPs itself
 * Add bunches of opal_output_verbose() to the find_ports() routine
   (to help diagnosing connectivity problems -- imaging running with
   --mca btl_base_verbose 10; you'll see all the find_ports() test
   results)
 * Make ompi_common_verbs_qp_test() warn if devices/ports are supplied
   in the if_include/if_exclude strings that do not exists (quite
   similar to what the openib BTL does today).
 * Add ompi_common_verbs_mca_register() function, which registers
   common verbs MCA params.  It will also register MCA param synonyms
   for thse MCA params to upper-level components (e.g.,
   btl_<upper-level-component>_<the-mca-param>). 
   * common_verbs_warn_nonexistent_if: warn if
     if_include/if_exclude-specified devices or ports do not exist.  

This commit was SVN r27332.
Этот коммит содержится в:
Jeff Squyres 2012-09-12 20:47:47 +00:00
родитель 1e7b958c2a
Коммит 3cc8b0461a
8 изменённых файлов: 470 добавлений и 134 удалений

Просмотреть файл

@ -28,17 +28,6 @@
#include "ompi_config.h"
#include <infiniband/verbs.h>
/* This is crummy, but <infiniband/driver.h> doesn't work on all
platforms with all compilers. Specifically, trying to include it
on RHEL4U3 with the PGI 32 bit compiler will cause problems because
certain 64 bit types are not defined. Per advice from Roland D.,
just include the one prototype that we need in this case
(ibv_get_sysfs_path()). */
#ifdef HAVE_INFINIBAND_DRIVER_H
#include <infiniband/driver.h>
#else
const char *ibv_get_sysfs_path(void);
#endif
#include <errno.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
@ -284,29 +273,6 @@ static int btl_openib_component_close(void)
return rc;
}
static bool check_basics(void)
{
int rc;
char *file;
struct stat s;
#if defined(__linux__)
/* Check to see if $sysfsdir/class/infiniband/ exists */
asprintf(&file, "%s/class/infiniband", ibv_get_sysfs_path());
if (NULL == file) {
return false;
}
rc = stat(file, &s);
free(file);
if (0 != rc || !S_ISDIR(s.st_mode)) {
return false;
}
#endif
/* It exists and is a directory -- good enough */
return true;
}
static void inline pack8(char **dest, uint8_t value)
{
/* Copy one character */
@ -1628,10 +1594,27 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
ompi_btl_openib_ini_values_t values, default_values;
int *allowed_ports = NULL;
bool need_search;
struct ibv_context *dev_context = NULL;
/* Open up the device */
dev_context = ibv_open_device(ib_dev);
if (NULL == dev_context) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Find out if this device supports RC QPs */
if (OMPI_SUCCESS != ompi_common_verbs_qp_test(dev_context,
OMPI_COMMON_VERBS_FLAGS_RC)) {
ibv_close_device(dev_context);
BTL_VERBOSE(("openib: RC QPs not supported -- skipping %s",
ibv_get_device_name(ib_dev)));
return OMPI_ERR_NOT_SUPPORTED;
}
device = OBJ_NEW(mca_btl_openib_device_t);
if(NULL == device){
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
ibv_close_device(dev_context);
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -1640,7 +1623,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
device->mem_reg_max = 1ull << 48;
device->ib_dev = ib_dev;
device->ib_dev_context = ibv_open_device(ib_dev);
device->ib_dev_context = dev_context;
device->ib_pd = NULL;
device->device_btls = OBJ_NEW(opal_pointer_array_t);
if (OPAL_SUCCESS != opal_pointer_array_init(device->device_btls, 2, INT_MAX, 2)) {
@ -2539,7 +2522,7 @@ btl_openib_component_init(int *num_btl_modules,
assume that the RDMA hardware drivers are not loaded, and
therefore we don't want OpenFabrics verbs support in this OMPI
job. No need to print a warning. */
if (!check_basics()) {
if (!ompi_common_verbs_check_basics()) {
goto no_btls;
}

Просмотреть файл

@ -15,9 +15,12 @@ headers = \
common_verbs.h
sources = \
common_verbs_basics.c \
common_verbs_devlist.c \
common_verbs_find_ports.c \
common_verbs_port.c
common_verbs_mca.c \
common_verbs_port.c \
common_verbs_qp_type.c
dist_pkgdata_DATA = \
help-ompi-common-verbs.txt

Просмотреть файл

@ -15,6 +15,11 @@
#include "ompi_config.h"
#include <stdint.h>
#include <infiniband/verbs.h>
#include "opal/mca/mca.h"
#include <infiniband/verbs.h>
#include "opal/class/opal_list.h"
@ -27,6 +32,17 @@ BEGIN_C_DECLS
OMPI_DECLSPEC struct ibv_device **ompi_ibv_get_device_list(int *num_devs);
OMPI_DECLSPEC void ompi_ibv_free_device_list(struct ibv_device **ib_devs);
/*
* common_verbs_mca.c
*/
extern bool ompi_common_verbs_warn_nonexistent_if;
OMPI_DECLSPEC void ompi_common_verbs_mca_register(mca_base_component_t *component);
/*
* common_verbs_basics.c
*/
bool ompi_common_verbs_check_basics(void);
/*
* common_verbs_find_ports.c
*/
@ -63,16 +79,21 @@ enum {
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IB = 0x4,
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IWARP = 0x8,
/* Note that these 2 link layer flags will only be useful if
OMPI_HAVE_IBV_LINK_LAYER is set to 1. Otherwise, they will be
defined(HAVE_IBV_LINK_LAYER_ETHERNET). Otherwise, they will be
ignored. */
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB = 0x10,
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET = 0x20,
OMPI_COMMON_VERBS_FLAGS_MAX
};
/*
/**
* Find a list of ibv_device ports that match a specific criteria.
*
* @param if_include (IN): comma-delimited list of interfaces to use
* @param if_exclude (IN): comma-delimited list of interfaces to NOT use
* @param flags (IN): bit flags
* @param verbose_stream (IN): stream to send opal_output_verbose messages to
*
* The ports will adhere to the if_include / if_exclude lists (only
* one can be specified). The lists are comma-delimited tokens in one
* of two forms:
@ -86,6 +107,9 @@ enum {
* it includes any of the capabilities/characteristics listed in the
* flags.
*
* Note that if the verbose_stream is >=0, output will be sent to that
* stream with a verbose level of 5.
*
* A valid list will always be returned. It will contain zero or more
* ompi_common_verbs_port_item_t items. Each item can be individually
* OBJ_RELEASE'd; the destructor will take care of cleaning up the
@ -93,9 +117,10 @@ enum {
* port_items referring to it have been freed).
*/
OMPI_DECLSPEC opal_list_t *
ompi_common_verbs_find_ibv_ports(const char *if_include,
const char *if_exclude,
int flags);
ompi_common_verbs_find_ports(const char *if_include,
const char *if_exclude,
int flags,
int verbose_stream);
/*
* Trivial function to compute the bandwidth on an ibv_port.
@ -115,6 +140,18 @@ ompi_common_verbs_port_bw(struct ibv_port_attr *port_attr,
OMPI_DECLSPEC int
ompi_common_verbs_mtu(struct ibv_port_attr *port_attr);
/*
* Test a device to see if it can handle a specific QP type (RC and/or
* UD). Will return the logical AND if multiple types are specified
* (e.g., if (RC|UD) are in flags, then will return OMPI_SUCCESS only
* if *both* types can be created on the device).
*
* Flags can be the logical OR of OMPI_COMMON_VERBS_FLAGS_RC and/or
* OMPI_COMMON_VERBS_FLAGS_UD. All other values are ignored.
*/
OMPI_DECLSPEC int ompi_common_verbs_qp_test(struct ibv_context *device_context,
int flags);
END_C_DECLS
#endif

65
ompi/mca/common/verbs/common_verbs_basics.c Обычный файл
Просмотреть файл

@ -0,0 +1,65 @@
/*
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
/* This is crummy, but <infiniband/driver.h> doesn't work on all
platforms with all compilers. Specifically, trying to include it
on RHEL4U3 with the PGI 32 bit compiler will cause problems because
certain 64 bit types are not defined. Per advice from Roland D.,
just include the one prototype that we need in this case
(ibv_get_sysfs_path()). */
#include <infiniband/verbs.h>
#ifdef HAVE_INFINIBAND_DRIVER_H
#include <infiniband/driver.h>
#else
const char *ibv_get_sysfs_path(void);
#endif
#include "ompi/constants.h"
#include "common_verbs.h"
/***********************************************************************/
bool ompi_common_verbs_check_basics(void)
{
#if defined(__linux__)
int rc;
char *file;
struct stat s;
/* Check to see if $sysfsdir/class/infiniband/ exists */
asprintf(&file, "%s/class/infiniband", ibv_get_sysfs_path());
if (NULL == file) {
return false;
}
rc = stat(file, &s);
free(file);
if (0 != rc || !S_ISDIR(s.st_mode)) {
return false;
}
#endif
/* It exists and is a directory -- good enough */
return true;
}

Просмотреть файл

@ -28,7 +28,10 @@
#include <stdio.h>
#include <string.h>
#include <infiniband/verbs.h>
#include <stdint.h>
#include "opal_stdint.h"
#include "opal/types.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "opal/class/opal_object.h"
@ -157,46 +160,72 @@ static bool want_this_port(char **include_list, char **exclude_list,
/* Will never get here */
}
/***********************************************************************/
/*
* It seems you can't probe a device / port to see if it supports a
* specific type of QP. You just have to try to make it and see if it
* works. This is a short helper function to try to make a QP of a
* specific type and return whether it worked.
*/
static bool make_qp(struct ibv_pd *pd, struct ibv_cq *cq,
enum ibv_qp_type type)
static const char *transport_name_to_str(enum ibv_transport_type transport_type)
{
struct ibv_qp_init_attr qpia;
struct ibv_qp *qp;
switch(transport_type) {
case IBV_TRANSPORT_IB: return "IB";
case IBV_TRANSPORT_IWARP: return "IWARP";
case IBV_TRANSPORT_UNKNOWN:
default: return "unknown";
}
}
qpia.qp_context = NULL;
qpia.send_cq = cq;
qpia.recv_cq = cq;
qpia.srq = NULL;
qpia.cap.max_send_wr = 1;
qpia.cap.max_recv_wr = 1;
qpia.cap.max_send_sge = 1;
qpia.cap.max_recv_sge = 1;
qpia.cap.max_inline_data = 0;
qpia.qp_type = type;
qpia.sq_sig_all = 0;
qp = ibv_create_qp(pd, &qpia);
if (NULL != qp) {
ibv_destroy_qp(qp);
return true;
#if defined(HAVE_IBV_LINK_LAYER_ETHERNET)
static const char *link_layer_to_str(int link_type)
{
switch(link_type) {
case IBV_LINK_LAYER_INFINIBAND: return "IB";
case IBV_LINK_LAYER_ETHERNET: return "IWARP";
case IBV_LINK_LAYER_UNSPECIFIED:
default: return "unspecified";
}
}
#endif
/***********************************************************************/
static void check_sanity(char ***if_sanity_list, const char *dev_name, int port)
{
int i;
char tmp[BUFSIZ], **list = *if_sanity_list;
const char *compare;
if (NULL == if_sanity_list || NULL == *if_sanity_list) {
return;
}
return false;
/* A match is found if:
- "dev_name" is in the list and port == -1, or
- "dev_name:port" is in the list
If a match is found, remove that entry from the list. */
memset(tmp, 0, sizeof(tmp));
if (port > 0) {
snprintf(tmp, sizeof(tmp) - 1, "%s:%d", dev_name, port);
compare = tmp;
} else {
compare = dev_name;
}
for (i = 0; NULL != list[i]; ++i) {
if (0 == strcmp(list[i], compare)) {
int count = opal_argv_count(list);
opal_argv_delete(&count, &list, i, 1);
--i;
}
}
}
/***********************************************************************/
/*
* Find a list of ibv_ports matching a set of criteria.
*/
opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
const char *if_exclude,
int flags)
opal_list_t *ompi_common_verbs_find_ports(const char *if_include,
const char *if_exclude,
int flags,
int stream)
{
int32_t num_devs;
struct ibv_device **devices;
@ -204,9 +233,7 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
struct ibv_context *device_context;
struct ibv_device_attr device_attr;
struct ibv_port_attr port_attr;
struct ibv_pd *pd = NULL;
struct ibv_cq *cq = NULL;
char **if_include_list = NULL, **if_exclude_list = NULL;
char **if_include_list = NULL, **if_exclude_list = NULL, **if_sanity_list = NULL;
ompi_common_verbs_device_item_t *di;
ompi_common_verbs_port_item_t *pi;
uint32_t i, j;
@ -224,9 +251,15 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
if (NULL != if_include && NULL != if_exclude) {
return port_list;
} else if (NULL != if_include) {
opal_output_verbose(5, stream, "finding verbs interfaces, including %s",
if_include);
if_include_list = opal_argv_split(if_include, ',');
if_sanity_list = opal_argv_copy(if_include_list);
} else if (NULL != if_exclude) {
opal_output_verbose(5, stream, "finding verbs interfaces, excluding %s",
if_exclude);
if_exclude_list = opal_argv_split(if_exclude, ',');
if_sanity_list = opal_argv_copy(if_exclude_list);
}
/* Query all the IBV devices on the machine. Use an ompi
@ -234,15 +267,27 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
over the history of the IBV API. */
devices = ompi_ibv_get_device_list(&num_devs);
if (0 == num_devs) {
opal_output_verbose(5, stream, "no verbs interfaces found");
goto err_free_argv;
} else {
opal_output_verbose(5, stream, "found %d verbs interface%s",
num_devs, (num_devs != 1) ? "s" : "");
}
/* Now loop through all the devices. Get the attributes for each
port on each device to see if they match our selection
criteria. */
for (i = 0; (int32_t) i < num_devs; ++i) {
/* See if this device is on the include/exclude sanity check
list. If it is, remove it from the sanity check list
(i.e., we should end up with an empty list at the end if
all entries in the sanity check list exist) */
device = devices[i];
check_sanity(&if_sanity_list, ibv_get_device_name(device), -1);
device_context = ibv_open_device(device);
opal_output_verbose(5, stream, "examining verbs interface: %s",
ibv_get_device_name(device));
if (NULL == device_context) {
orte_show_help("help-ompi-common-verbs.txt",
"ibv_open_device fail", true,
@ -261,38 +306,58 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
goto err_free_port_list;
}
/* Now that we have the attributes of this device, remove all
ports of this device from the sanity check list. Note that
IBV ports are indexed from 1, not 0. */
for (j = 1; j <= device_attr.phys_port_cnt; j++) {
check_sanity(&if_sanity_list, ibv_get_device_name(device), j);
}
/* Check the the device-specific flags to see if we want this
device */
want = true;
if (flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IB &&
IBV_TRANSPORT_IB != device->transport_type) {
opal_output_verbose(5, stream, "verbs interface %s has wrong type (has %s, want IB)",
ibv_get_device_name(device),
transport_name_to_str(device->transport_type));
want = false;
}
if (flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IWARP &&
IBV_TRANSPORT_IWARP != device->transport_type) {
opal_output_verbose(5, stream, "verbs interface %s has wrong type (has %s, want IWARP)",
ibv_get_device_name(device),
transport_name_to_str(device->transport_type));
want = false;
}
/* Check for RC or UD QP support */
if (flags & OMPI_COMMON_VERBS_FLAGS_RC ||
flags & OMPI_COMMON_VERBS_FLAGS_UD) {
if (OPAL_SUCCESS != ompi_common_verbs_qp_test(device_context, flags)) {
want = false;
opal_output_verbose(5, stream,
"verbs interface %s:%d: failed to make %s QP",
ibv_get_device_name(device), j,
((flags & (OMPI_COMMON_VERBS_FLAGS_RC |
OMPI_COMMON_VERBS_FLAGS_UD)) ==
(OMPI_COMMON_VERBS_FLAGS_RC |
OMPI_COMMON_VERBS_FLAGS_UD)) ?
"both UD and RC" :
(flags & OMPI_COMMON_VERBS_FLAGS_RC) ?
"RC" : "UD");
}
}
/* If we didn't want it, go to the next device */
if (!want) {
continue;
}
/* If we asked for check for RC or UD support, then we'll need
a PD and CQ for checking, below. So alloc one. */
if (flags & OMPI_COMMON_VERBS_FLAGS_RC ||
flags & OMPI_COMMON_VERBS_FLAGS_UD) {
pd = ibv_alloc_pd(device_context);
cq = ibv_create_cq(device_context, 1, NULL, NULL, 0);
if (NULL == cq || NULL == pd) {
goto err_destroy_cq_pd;
}
}
/* Make a device_item_t to hold the device information */
di = OBJ_NEW(ompi_common_verbs_device_item_t);
if (NULL == di) {
goto err_destroy_cq_pd;
goto err_free_port_list;
}
di->device = device;
di->context = device_context;
@ -305,6 +370,8 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
/* If we don't want this port (based on if_include /
if_exclude lists), skip it */
if (!want_this_port(if_include_list, if_exclude_list, di, j)) {
opal_output_verbose(5, stream, "verbs interface %s:%d: rejected by include/exclude",
ibv_get_device_name(device), j);
continue;
}
@ -315,11 +382,13 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
orte_process_info.nodename,
ibv_get_device_name(device),
errno, strerror(errno));
goto err_destroy_cq_pd;
goto err_free_port_list;
}
/* We definitely only want ACTIVE ports */
if (IBV_PORT_ACTIVE != port_attr.state) {
opal_output_verbose(5, stream, "verbs interface %s:%d: not ACTIVE",
ibv_get_device_name(device), j);
continue;
}
@ -329,30 +398,35 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
if (0 == flags) {
want = true;
}
if (flags & OMPI_COMMON_VERBS_FLAGS_RC) {
/* It doesn't look like you can query whether a
device/port supports RC QP's. You just have to try
to make one. :-( If it succeeds, the device/port
supports it. */
if (make_qp(pd, cq, IBV_QPT_RC)) {
want = true;
}
}
if (flags & OMPI_COMMON_VERBS_FLAGS_UD) {
/* See above comment about RC QP's -- same rationale holds
true here. */
if (make_qp(pd, cq, IBV_QPT_UD)) {
want = true;
}
}
if ((flags & (OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) ==
(OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) {
/* If they specified both link layers, then we want this port */
want = true;
} else if ((flags & (OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) == 0) {
/* If they specified neither link layer, then we want this port */
want = true;
}
#if defined(HAVE_IBV_LINK_LAYER_ETHERNET)
if (flags & OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB &&
IBV_LINK_LAYER_INFINIBAND == port_attr.link_layer) {
want = true;
}
if (flags & OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET &&
IBV_LINK_LAYER_ETHERNET == port_attr.link_layer) {
want = true;
else if (flags & OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB) {
if (IBV_LINK_LAYER_INFINIBAND == port_attr.link_layer) {
want = true;
} else {
opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want IB)",
ibv_get_device_name(device), j,
link_layer_to_str(port_attr.link_layer));
}
} else if (flags & OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET) {
if (IBV_LINK_LAYER_ETHERNET == port_attr.link_layer) {
want = true;
} else {
opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want Ethernet)",
ibv_get_device_name(device), j,
link_layer_to_str(port_attr.link_layer));
}
}
#endif
@ -360,28 +434,20 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
continue;
}
/* If we got this far, we want the port. Make an item for
it and add it to the list. */
/* If we got this far, we want the port. Make an item for it. */
pi = OBJ_NEW(ompi_common_verbs_port_item_t);
if (NULL == pi) {
goto err_destroy_cq_pd;
goto err_free_port_list;
}
pi->device = di;
pi->port_num = j;
pi->port_attr = port_attr;
OBJ_RETAIN(di);
/* Add the port item to the list */
opal_list_append(port_list, &pi->super);
}
/* If we allocated a pd for testing RC/UD, free it here */
if (NULL != pd) {
ibv_dealloc_pd(pd);
pd = NULL;
}
if (NULL != cq) {
ibv_destroy_cq(cq);
cq = NULL;
opal_output_verbose(5, stream, "found acceptable verbs interface %s:%d",
ibv_get_device_name(device), j);
}
/* We're done with the device; if some ports are using it, its
@ -390,19 +456,30 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
OBJ_RELEASE(di);
}
/* Sanity check that the devices specified in the if_include /
if_exclude lists actually existed. If this is true, then the
sanity list will now be empty. If there are still items left
on the list, then they didn't exist. Bad. Print a warning (if
the warning is not disabled). */
if (0 != opal_argv_count(if_sanity_list)) {
if (ompi_common_verbs_warn_nonexistent_if) {
char *str = opal_argv_join(if_sanity_list, ',');
orte_show_help("help-ompi-common-verbs.txt", "nonexistent port",
true, orte_process_info.nodename,
((NULL != if_include) ? "in" : "ex"), str);
free(str);
/* Only warn once per process */
ompi_common_verbs_warn_nonexistent_if = false;
}
}
if (NULL != if_sanity_list) {
opal_argv_free(if_sanity_list);
}
/* All done! */
return port_list;
err_destroy_cq_pd:
if (NULL != pd) {
ibv_dealloc_pd(pd);
pd = NULL;
}
if (NULL != cq) {
ibv_destroy_cq(cq);
cq = NULL;
}
err_free_port_list:
for (item = opal_list_remove_first(port_list);
item != NULL;
@ -411,6 +488,10 @@ opal_list_t *ompi_common_verbs_find_ibv_ports(const char *if_include,
}
err_free_argv:
if (NULL != if_sanity_list) {
opal_argv_free(if_sanity_list);
if_sanity_list = NULL;
}
opal_argv_free(if_include_list);
if_include_list = NULL;
opal_argv_free(if_exclude_list);

56
ompi/mca/common/verbs/common_verbs_mca.c Обычный файл
Просмотреть файл

@ -0,0 +1,56 @@
/*
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/mca/base/mca_base_param.h"
#include "common_verbs.h"
/***********************************************************************/
static bool registered = false;
static int warn_nonexistent_if_index = -1;
bool ompi_common_verbs_warn_nonexistent_if = true;
static void register_internal(void)
{
int ival;
warn_nonexistent_if_index =
mca_base_param_reg_int_name("ompi_common_verbs",
"warn_nonexistent_if",
"Warn if non-existent devices and/or ports are specified in device include/exclude MCA parameters "
"(0 = do not warn; any other value = warn)",
false, false,
(int) ompi_common_verbs_warn_nonexistent_if,
&ival);
ompi_common_verbs_warn_nonexistent_if = (bool) ival;
registered = true;
}
void ompi_common_verbs_mca_register(mca_base_component_t *component)
{
int ival;
if (!registered) {
register_internal();
}
/* Make synonyms for the common_verbs MCA params. Need to look up
the value again, because a new/different value may have been
set by the new synonym name. */
mca_base_param_reg_syn(warn_nonexistent_if_index, component,
"warn_nonexistent_if", false);
mca_base_param_lookup_int(warn_nonexistent_if_index, &ival);
ompi_common_verbs_warn_nonexistent_if = (bool) ival;
}

Просмотреть файл

@ -0,0 +1,99 @@
/*
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include <infiniband/verbs.h>
#include "ompi/constants.h"
#include "common_verbs.h"
/*
* It seems you can't probe a device / port to see if it supports a
* specific type of QP. You just have to try to make it and see if it
* works. This is a short helper function to try to make a QP of a
* specific type and return whether it worked.
*/
static bool make_qp(struct ibv_pd *pd, struct ibv_cq *cq, enum ibv_qp_type type)
{
struct ibv_qp_init_attr qpia;
struct ibv_qp *qp;
memset(&qpia, 0, sizeof(qpia));
qpia.qp_context = NULL;
qpia.send_cq = cq;
qpia.recv_cq = cq;
qpia.srq = NULL;
qpia.cap.max_send_wr = 1;
qpia.cap.max_recv_wr = 1;
qpia.cap.max_send_sge = 1;
qpia.cap.max_recv_sge = 1;
qpia.cap.max_inline_data = 0;
qpia.qp_type = type;
qpia.sq_sig_all = 0;
qp = ibv_create_qp(pd, &qpia);
if (NULL != qp) {
ibv_destroy_qp(qp);
return true;
}
return false;
}
int ompi_common_verbs_qp_test(struct ibv_context *device_context, int flags)
{
int rc = OMPI_SUCCESS;
struct ibv_pd *pd = NULL;
struct ibv_cq *cq = NULL;
/* Bozo check */
if (NULL == device_context ||
(0 == (flags & (OMPI_COMMON_VERBS_FLAGS_RC | OMPI_COMMON_VERBS_FLAGS_UD)))) {
return OMPI_ERR_BAD_PARAM;
}
/* Try to make both the PD and CQ */
pd = ibv_alloc_pd(device_context);
if (NULL == pd) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
cq = ibv_create_cq(device_context, 1, NULL, NULL, 0);
if (NULL == cq) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto out;
}
/* Now try to make the QP(s) of the desired type(s) */
if (flags & OMPI_COMMON_VERBS_FLAGS_RC &&
!make_qp(pd, cq, IBV_QPT_RC)) {
rc = OMPI_ERR_NOT_SUPPORTED;
goto out;
}
if (flags & OMPI_COMMON_VERBS_FLAGS_UD &&
!make_qp(pd, cq, IBV_QPT_UD)) {
rc = OMPI_ERR_NOT_SUPPORTED;
goto out;
}
out:
/* Free the PD and/or CQ */
if (NULL != pd) {
ibv_dealloc_pd(pd);
}
if (NULL != cq) {
ibv_destroy_cq(cq);
}
return rc;
}

Просмотреть файл

@ -34,3 +34,15 @@ job may or may not continue.
Hostname: %s
Device name: %s
Errror (%d): %s
#
[nonexistent port]
WARNING: One or more nonexistent OpenFabrics devices/ports were
specified:
Host: %s
MCA parameter: ompi_common_verbs_%sclude
Nonexistent entities: %s
These entities will be ignored. You can disable this warning by
setting the ompi_common_verbs_warn_nonexistent_if MCA parameter to 0.
#