1
1
openmpi/ompi/mca/sbgp/ibnet/sbgp_ibnet_component.c
Gilles Gouaillardet b565e69b86 check-help-strings cleanup
This commit was SVN r32491.
2014-08-11 03:19:57 +00:00

600 строки
19 KiB
C

/*
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
*/
#include "ompi_config.h"
#include "infiniband/verbs.h"
#include <unistd.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
#include "ompi/constants.h"
#include "ompi/communicator/communicator.h"
#include "opal/util/argv.h"
#include "opal/include/opal/types.h"
#include "opal_stdint.h"
#include "sbgp_ibnet.h"
#include "sbgp_ibnet_mca.h"
#include "ompi/mca/common/ofacm/base.h"
#include "ompi/mca/common/ofacm/connect.h"
#include "ompi/mca/common/verbs/common_verbs.h"
/*
* Public string showing the coll ompi_sm V2 component version number
*/
const char *mca_sbgp_ibnet_component_version_string =
"Open MPI sbgp - ibnet collective MCA component version " OMPI_VERSION;
/*
* Local functions
*/
static int mca_sbgp_ibnet_open(void);
static int mca_sbgp_ibnet_close(void);
static int mca_sbgp_ibnet_init_query(bool enable_progress_threads,
bool enable_mpi_threads);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
mca_sbgp_ibnet_component_t mca_sbgp_ibnet_component = {
/* First, fill in the super */
{
/* First, the mca_component_t struct containing meta
information about the component itself */
{
MCA_SBGP_BASE_VERSION_2_0_0,
/* Component name and version */
"ibnet",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
/* Component open and close functions */
mca_sbgp_ibnet_open, /* mca_open_component */
mca_sbgp_ibnet_close, /* mca_close_component */
NULL, /* mca_query_component */
mca_sbgp_ibnet_register_params, /* mca_register_component_params */
},
mca_sbgp_ibnet_init_query, /* sbgp_init_query */
mca_sbgp_ibnet_select_procs, /* select_procs */
0, /* (default) priority */
},
/* verbose mode */
false,
/* Maximum allowed number of subroups*/
0,
/* Enable disable default subnet id warning */
false,
false,
/* IB MTU requested by user */
0,
/* IB partition definition */
0,
/* Keeping hca data */
NULL,
NULL,
NULL,
NULL,
/** Dummy argv-style list; a copy of names from the
if_[in|ex]clude list that we use for error checking (to ensure
that they all exist) */
NULL,
};
static int mca_sbgp_ibnet_dummy_init_query(
bool enable_progress_threads, bool enable_mpi_threads)
{
return OMPI_SUCCESS;
}
/*
* Open the component
*/
static int mca_sbgp_ibnet_open(void)
{
/* local variables */
mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component;
mca_sbgp_ibnet_component.pkey_val &= SBGP_IBNET_IB_PKEY_MASK;
cs->total_active_ports = 0;
cs->curr_max_group_id = 100;
OBJ_CONSTRUCT(&cs->devices, opal_list_t);
return OMPI_SUCCESS;
}
/*
* Close the component
*/
static int mca_sbgp_ibnet_close(void)
{
mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component;
OBJ_DESTRUCT(&cs->devices);
return OMPI_SUCCESS;
}
static void mca_sbgp_ibnet_device_constructor
(mca_sbgp_ibnet_device_t *device)
{
/* Init OFACM stuf */
device->ib_dev = NULL;
device->device_index = -1;
device->num_act_ports = 0;
memset(&device->ib_dev_attr, 0, sizeof(struct ibv_device_attr));
device->cpcs= NULL;
device->num_cpcs = 0;
device->ports = NULL;
}
static void mca_sbgp_ibnet_device_destructor
(mca_sbgp_ibnet_device_t *device)
{
/* release memory */
if (NULL != device->ports) {
free(device->ports);
}
}
OBJ_CLASS_INSTANCE(mca_sbgp_ibnet_device_t,
opal_list_item_t,
mca_sbgp_ibnet_device_constructor,
mca_sbgp_ibnet_device_destructor);
static int
get_port_list(mca_sbgp_ibnet_device_t *device, int *allowed_ports)
{
char *name;
const char *dev_name;
int i, j, k, num_ports = 0;
dev_name = ibv_get_device_name(device->ib_dev);
name = (char*) malloc(strlen(dev_name) + 4);
if (NULL == name) {
return 0;
}
num_ports = 0;
if (NULL != mca_sbgp_ibnet_component.if_include_list) {
/* If only the device name is given (eg. mtdevice0,mtdevice1) use all
ports */
i = 0;
while (mca_sbgp_ibnet_component.if_include_list[i]) {
if (0 == strcmp(dev_name,
mca_sbgp_ibnet_component.if_include_list[i])) {
num_ports = device->ib_dev_attr.phys_port_cnt;
IBNET_VERBOSE(10, ("if_include_list - %s.\n", mca_sbgp_ibnet_component.if_include_list[i]));
goto done;
}
++i;
}
/* Include only requested ports on the device */
for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) {
sprintf(name, "%s:%d", dev_name, i);
for (j = 0;
NULL != mca_sbgp_ibnet_component.if_include_list[j]; ++j) {
if (0 == strcmp(name,
mca_sbgp_ibnet_component.if_include_list[j])) {
IBNET_VERBOSE(10, ("Allowed port %d: idx %d; if_include_list - %s\n",
i, num_ports, mca_sbgp_ibnet_component.if_include_list[j]));
allowed_ports[num_ports++] = i;
break;
}
}
}
} else if (NULL != mca_sbgp_ibnet_component.if_exclude_list) {
/* If only the device name is given (eg. mtdevice0,mtdevice1) exclude
all ports */
i = 0;
while (mca_sbgp_ibnet_component.if_exclude_list[i]) {
if (0 == strcmp(dev_name,
mca_sbgp_ibnet_component.if_exclude_list[i])) {
num_ports = 0;
goto done;
}
++i;
}
/* Exclude the specified ports on this device */
for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) {
sprintf(name,"%s:%d",dev_name,i);
for (j = 0;
NULL != mca_sbgp_ibnet_component.if_exclude_list[j]; ++j) {
if (0 == strcmp(name,
mca_sbgp_ibnet_component.if_exclude_list[j])) {
/* If found, set a sentinel value */
j = -1;
break;
}
}
/* If we didn't find it, it's ok to include in the list */
if (-1 != j) {
allowed_ports[num_ports++] = i;
}
}
} else {
/* Assume that all ports are allowed. num_ports will be adjusted
below to reflect whether this is true or not. */
for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) {
allowed_ports[num_ports++] = i;
}
}
done:
/* Remove the following from the error-checking if_list:
- bare device name
- device name suffixed with port number */
if (NULL != mca_sbgp_ibnet_component.if_list) {
for (i = 0; NULL != mca_sbgp_ibnet_component.if_list[i]; ++i) {
/* Look for raw device name */
if (0 == strcmp(mca_sbgp_ibnet_component.if_list[i], dev_name)) {
j = opal_argv_count(mca_sbgp_ibnet_component.if_list);
opal_argv_delete(&j, &(mca_sbgp_ibnet_component.if_list),
i, 1);
--i;
}
}
for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) {
sprintf(name, "%s:%d", dev_name, i);
for (j = 0; NULL != mca_sbgp_ibnet_component.if_list[j]; ++j) {
if (0 == strcmp(mca_sbgp_ibnet_component.if_list[j], name)) {
k = opal_argv_count(mca_sbgp_ibnet_component.if_list);
opal_argv_delete(&k, &(mca_sbgp_ibnet_component.if_list),
j, 1);
--j;
break;
}
}
}
}
free(name);
return num_ports;
}
static int ibnet_init_port(struct mca_sbgp_ibnet_device_t *device,
int port_index, struct ibv_port_attr *ib_port_attr,
struct ibv_context *ib_dev_context)
{
union ibv_gid gid;
struct mca_sbgp_ibnet_port_t *p = &device->ports[port_index];
/* Set port data */
p->lmc = (1 << ib_port_attr->lmc);
p->lid = ib_port_attr->lid;
p->stat = ib_port_attr->state;
p->mtu = ib_port_attr->active_mtu;
IBNET_VERBOSE(10, ("Setting port data (%s:%d) lid=%d, lmc=%d, stat=%d, mtu=%d\n",
ibv_get_device_name(device->ib_dev), p->id, p->lid,
p->lmc, p->stat, p->mtu));
if (0 != ibv_query_gid(ib_dev_context, p->id, 0, &gid)) {
IBNET_ERROR(("ibv_query_gid failed (%s:%d)\n",
ibv_get_device_name(device->ib_dev), p->id));
return OMPI_ERR_NOT_FOUND;
}
/* set subnet data */
p->subnet_id = ntoh64(gid.global.subnet_prefix);
/* p->subnet_id = gid.global.subnet_prefix; */
IBNET_VERBOSE(10, ("my IB-only subnet_id for HCA %d %s port %d is %lx\n" PRIx64,
gid.global.subnet_prefix,ibv_get_device_name(device->ib_dev), p->id, p->subnet_id));
return OMPI_SUCCESS;
}
/* Find active port */
static mca_sbgp_ibnet_device_t* ibnet_load_ports(struct ibv_device *ib_dev, int device_index)
{
struct ibv_context *ib_dev_context = NULL;
mca_sbgp_ibnet_device_t *device = NULL;
int *allowed_ports = NULL;
int rc, port_cnt, port, i, ret, p = 0;
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
if (IBV_TRANSPORT_IB != ib_dev->transport_type) {
IBNET_VERBOSE(10, ("Skipping non IB device %s",
ibv_get_device_name(ib_dev)));
goto error;
}
#endif
device = OBJ_NEW(mca_sbgp_ibnet_device_t);
device->ib_dev = ib_dev;
device->device_index = device_index;
ib_dev_context = ibv_open_device(ib_dev);
if(NULL == ib_dev_context) {
IBNET_ERROR(("Error obtaining device context for %s errno says %s",
ibv_get_device_name(device->ib_dev), strerror(errno)));
goto error;
}
if(ibv_query_device(ib_dev_context, &device->ib_dev_attr)) {
IBNET_ERROR(("error obtaining device attributes for %s errno says %s",
ibv_get_device_name(ib_dev), strerror(errno)));
goto error;
}
allowed_ports = (int *) calloc(device->ib_dev_attr.phys_port_cnt, sizeof(int));
if (NULL == allowed_ports) {
goto error;
}
port_cnt = get_port_list(device, allowed_ports);
if (0 == port_cnt) {
goto error;
}
#if OPAL_ENABLE_DEBUG
for (i = 0; i < port_cnt; ++i) {
IBNET_VERBOSE(10, ("allowed port %d with idx %d.\n", allowed_ports[i], i));
}
#endif
device->num_allowed_ports = port_cnt;
device->ports = (mca_sbgp_ibnet_port_t *) calloc(port_cnt, sizeof(mca_sbgp_ibnet_port_t));
if (NULL == device->ports) {
goto error;
}
/* Note ports are 1 based (i >= 1) */
for(port = 0; port < port_cnt; port++) {
struct ibv_port_attr ib_port_attr;
i = allowed_ports[port];
if(ibv_query_port(ib_dev_context, i, &ib_port_attr)){
IBNET_ERROR(("Error getting port attributes for device %s "
"port number %d errno says %s",
ibv_get_device_name(device->ib_dev), i, strerror(errno)));
continue;
}
if(IBV_PORT_ACTIVE == ib_port_attr.state) {
/* Pasha: Need to think how we want to handle MTUs
if (ib_port_attr.active_mtu < mca_bcol_iboffload_component.mtu){
device->mtu = ib_port_attr.active_mtu;
}
*/
/* start to put port info */
device->ports[p].id = i;
device->ports[p].stat = ib_port_attr.state;
device->ports[p].mtu = ib_port_attr.active_mtu;
device->ports[p].used = true;
if (0 == mca_sbgp_ibnet_component.pkey_val) {
ret = ibnet_init_port(device, p, &ib_port_attr, ib_dev_context);
if (OMPI_SUCCESS != ret) {
IBNET_ERROR(("Device %s "
"port number %d , failed to init port, errno says %s",
ibv_get_device_name(device->ib_dev),
i, strerror(errno)));
continue;
}
} else {
uint16_t pkey,j;
device->ports[p].used = false;
for (j = 0; j < device->ib_dev_attr.max_pkeys; j++) {
if(ibv_query_pkey(ib_dev_context, i, j, &pkey)){
IBNET_ERROR(("error getting pkey for index %d, device %s "
"port number %d errno says %s",
j, ibv_get_device_name(device->ib_dev), i, strerror(errno)));
continue;
}
pkey = ntohs(pkey) & MCA_SBGP_IBNET_PKEY_MASK;
if (pkey == (uint32_t) mca_sbgp_ibnet_component.pkey_val){
ret = ibnet_init_port(device, p, &ib_port_attr, ib_dev_context);
if (OMPI_SUCCESS != ret) {
IBNET_ERROR(("Device %s "
"port number %d , failed to init port, errno says %s",
ibv_get_device_name(device->ib_dev),
i, strerror(errno)));
continue;
}
}
}
}
p++; /* One port was loaded, go to the next one */
}
}
device->num_act_ports = p;
/* Update total number of active ports */
mca_sbgp_ibnet_component.total_active_ports += p;
if (0 != device->num_act_ports) {
ompi_common_ofacm_base_dev_desc_t dev;
/* Init dev */
dev.ib_dev = ib_dev;
dev.ib_dev_context = ib_dev_context;
dev.capabilities = 0;
rc = ompi_common_ofacm_base_select_for_local_port(
&dev, &device->cpcs, (int *)&device->num_cpcs);
/* If we get NOT_SUPPORTED, then no CPC was found for this
port. But that's not a fatal error -- just keep going;
let's see if we find any usable openib modules or not. */
if (OMPI_SUCCESS != rc) {
/* All others *are* fatal. Note that we already did a
show_help in the lower layer */
IBNET_VERBOSE(10, ("Device %s, no CPC found",
ibv_get_device_name(device->ib_dev)));
goto error;
}
}
/* we do not continue to use the device we just collect data,
* so close it for now. We will open it later in iboffload coll*/
if(ibv_close_device(ib_dev_context)) {
IBNET_ERROR(("Device %s, failed to close the device %s",
ibv_get_device_name(device->ib_dev), strerror(errno)));
}
if (0 == device->num_act_ports) {
goto error;
}
/* Pasha - I do not like the error flow here */
free(allowed_ports);
return device;
error:
if (NULL != allowed_ports) {
free(allowed_ports);
}
OBJ_DESTRUCT(device);
return NULL;
}
/* Create list of IB hca that have active port */
static int ibnet_load_devices(void)
{
int num_devs, i;
struct ibv_device **ib_devs = NULL;
mca_sbgp_ibnet_device_t *device = NULL;
mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component;
IBNET_VERBOSE(7, ("Entering to ibnet_load_devices"));
/* Get list of devices */
ib_devs = ompi_ibv_get_device_list(&num_devs);
if(0 == num_devs || NULL == ib_devs) {
IBNET_VERBOSE(10, ("No ib devices found"));
/* No hca error*/
opal_show_help("help-mpi-btl-base.txt", "btl:no-nics", true);
return OMPI_ERROR;
}
for (i = 0; i < num_devs; i++) {
device = ibnet_load_ports(ib_devs[i], i);
if (NULL != device) {
IBNET_VERBOSE(10, ("Device %s was appended to device list with index %d.\n",
ibv_get_device_name(device->ib_dev), i));
opal_list_append(&cs->devices,
(opal_list_item_t *) device);
}
}
if (opal_list_is_empty(&cs->devices)) {
/* No relevand devices were found, return error */
IBNET_ERROR(("No active devices found"));
return OMPI_ERROR;
/* Maybe need to add error here*/
}
ompi_ibv_free_device_list(ib_devs);
return OMPI_SUCCESS;
}
/* query to see if the component is available for use, and can
* satisfy the thread and progress requirements
*/
int mca_sbgp_ibnet_init_query(bool enable_progress_threads,
bool enable_mpi_threads)
{
int rc, list_count = 0;
/* Parse the include and exclude lists, checking for errors */
mca_sbgp_ibnet_component.if_list = NULL;
mca_sbgp_ibnet_component.if_include_list = NULL;
mca_sbgp_ibnet_component.if_exclude_list = NULL;
IBNET_VERBOSE(7, ("Calling mca_sbgp_ibnet_init_query"));
if (NULL != mca_sbgp_ibnet_component.if_include) {
list_count++;
}
if (NULL != mca_sbgp_ibnet_component.if_exclude) {
list_count++;
}
if (list_count > 1) {
IBNET_ERROR(("Bad --mca (if_include, if_exclude) parameters !"));
return OMPI_ERROR;
} else if (NULL != mca_sbgp_ibnet_component.if_include) {
mca_sbgp_ibnet_component.if_include_list =
opal_argv_split(mca_sbgp_ibnet_component.if_include, ',');
mca_sbgp_ibnet_component.if_list =
opal_argv_copy(mca_sbgp_ibnet_component.if_include_list);
} else if (NULL != mca_sbgp_ibnet_component.if_exclude) {
mca_sbgp_ibnet_component.if_exclude_list =
opal_argv_split(mca_sbgp_ibnet_component.if_exclude, ',');
mca_sbgp_ibnet_component.if_list =
opal_argv_copy(mca_sbgp_ibnet_component.if_exclude_list);
}
/* Init CPC components */
rc = ompi_common_ofacm_base_init();
if (OMPI_SUCCESS != rc) {
return rc;
}
/* Load all devices and active ports */
rc = ibnet_load_devices();
if (OMPI_SUCCESS != rc) {
return rc;
}
mca_sbgp_ibnet_component.super.sbgp_init_query =
mca_sbgp_ibnet_dummy_init_query;
return OMPI_SUCCESS;
}