1
1

Add support for btl_ofud_{in,ex}clude MCA parameters.

This commit was SVN r18916.
Этот коммит содержится в:
Andrew Friedley 2008-07-15 17:57:52 +00:00
родитель 07841808ee
Коммит dabe6defb3
2 изменённых файлов: 189 добавлений и 12 удалений

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -61,6 +61,12 @@ struct mca_btl_ud_component_t {
uint32_t max_btls; /**< Maximum number of BTL modules */
uint32_t num_btls; /**< Number of available/initialized BTL modules */
char* if_include;
char** if_include_list;
char* if_exclude;
char** if_exclude_list;
char** if_list;
struct mca_btl_ud_module_t* ud_btls; /**< array of available BTLs */
opal_list_t ud_procs; /**< list of ib proc structures */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2008 The University of Tennessee and The University
@ -28,6 +28,7 @@
#include "orte/util/show_help.h"
#include "ompi/mca/btl/btl.h"
#include "opal/sys/timer.h"
#include "opal/util/argv.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/mca/btl/base/base.h"
@ -126,6 +127,12 @@ int mca_btl_ud_component_open(void)
"Maximum number of HCAs/ports to use",
4, (int*)&mca_btl_ofud_component.max_btls);
mca_btl_ud_param_reg_string("if_include", "Comma-delimited list of HCAs/ports to be used; empty value means to use all HCAs/ports found",
NULL, &mca_btl_ofud_component.if_include);
mca_btl_ud_param_reg_string("if_exclude", "Comma-delimited list of HCAs/ports to be excluded; empty value means to use all HCAs/ports found",
NULL, &mca_btl_ofud_component.if_exclude);
mca_btl_ud_param_reg_string("mpool", "Name of the memory pool to be used",
"rdma", &mca_btl_ofud_component.ud_mpool_name);
@ -231,6 +238,119 @@ static int mca_btl_ud_modex_send(void)
return rc;
}
static int
get_port_list(struct ibv_device* ib_dev, int port_cnt, int *allowed_ports)
{
int i, j, k, num_ports = 0;
const char *dev_name;
char *name;
dev_name = ibv_get_device_name(ib_dev);
name = (char*) malloc(strlen(dev_name) + 4);
if (NULL == name) {
return 0;
}
/* Assume that all ports are allowed. num_ports will be adjusted
below to reflect whether this is true or not. */
for (i = 1; i <= port_cnt; ++i) {
allowed_ports[num_ports++] = i;
}
num_ports = 0;
if (NULL != mca_btl_ofud_component.if_include_list) {
/* If only the HCA name is given (eg. mthca0,mthca1) use all
ports */
i = 0;
while (mca_btl_ofud_component.if_include_list[i]) {
if (0 == strcmp(dev_name,
mca_btl_ofud_component.if_include_list[i])) {
num_ports = port_cnt;
goto done;
}
++i;
}
/* Include only requested ports on the HCA */
for (i = 1; i <= port_cnt; ++i) {
sprintf(name,"%s:%d",dev_name,i);
for (j = 0;
NULL != mca_btl_ofud_component.if_include_list[j]; ++j) {
if (0 == strcmp(name,
mca_btl_ofud_component.if_include_list[j])) {
allowed_ports[num_ports++] = i;
break;
}
}
}
} else if (NULL != mca_btl_ofud_component.if_exclude_list) {
/* If only the HCA name is given (eg. mthca0,mthca1) exclude
all ports */
i = 0;
while (mca_btl_ofud_component.if_exclude_list[i]) {
if (0 == strcmp(dev_name,
mca_btl_ofud_component.if_exclude_list[i])) {
num_ports = 0;
goto done;
}
++i;
}
/* Exclude the specified ports on this HCA */
for (i = 1; i <= port_cnt; ++i) {
sprintf(name,"%s:%d",dev_name,i);
for (j = 0;
NULL != mca_btl_ofud_component.if_exclude_list[j]; ++j) {
if (0 == strcmp(name,
mca_btl_ofud_component.if_exclude_list[j])) {
/* If found, set a sentinel value */
j = -1;
break;
}
}
/* If we didn't find it, it's ok to include in the list */
if (-1 != j) {
allowed_ports[num_ports++] = i;
}
}
} else {
num_ports = port_cnt;
}
done:
/* Remove the following from the error-checking if_list:
- bare device name
- device name suffixed with port number */
if (NULL != mca_btl_ofud_component.if_list) {
for (i = 0; NULL != mca_btl_ofud_component.if_list[i]; ++i) {
/* Look for raw device name */
if (0 == strcmp(mca_btl_ofud_component.if_list[i], dev_name)) {
j = opal_argv_count(mca_btl_ofud_component.if_list);
opal_argv_delete(&j, &(mca_btl_ofud_component.if_list),
i, 1);
--i;
}
}
for (i = 1; i <= port_cnt; ++i) {
sprintf(name, "%s:%d", dev_name, i);
for (j = 0; NULL != mca_btl_ofud_component.if_list[j]; ++j) {
if (0 == strcmp(mca_btl_ofud_component.if_list[j], name)) {
k = opal_argv_count(mca_btl_ofud_component.if_list);
opal_argv_delete(&k, &(mca_btl_ofud_component.if_list),
j, 1);
--j;
break;
}
}
}
}
free(name);
return num_ports;
}
/*
* UD component initialization:
* (1) read interface list from kernel and compare against component parameters
@ -247,12 +367,14 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
struct ibv_device* ib_dev;
int32_t num_devs;
mca_btl_base_module_t** btls;
uint32_t i, j;
uint32_t i, j, k;
uint32_t port_cnt;
opal_list_t btl_list;
mca_btl_ud_module_t* ud_btl;
mca_btl_base_selected_module_t* ib_selected;
opal_list_item_t* item;
unsigned short seedv[3];
int* allowed_ports = NULL;
char* btl_str;
char* tok;
@ -287,12 +409,38 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
seedv[2] = opal_sys_timer_get_cycles();
seed48(seedv);
/* Parse the include and exclude lists, checking for errors */
mca_btl_ofud_component.if_include_list =
mca_btl_ofud_component.if_exclude_list =
mca_btl_ofud_component.if_list = NULL;
if (NULL != mca_btl_ofud_component.if_include &&
NULL != mca_btl_ofud_component.if_exclude) {
orte_show_help("help-mpi-btl-openib.txt",
"specified include and exclude", true,
mca_btl_ofud_component.if_include,
mca_btl_ofud_component.if_exclude, NULL);
btls = NULL;
goto modex_send;
} else if (NULL != mca_btl_ofud_component.if_include) {
mca_btl_ofud_component.if_include_list =
opal_argv_split(mca_btl_ofud_component.if_include, ',');
mca_btl_ofud_component.if_list =
opal_argv_copy(mca_btl_ofud_component.if_include_list);
} else if (NULL != mca_btl_ofud_component.if_exclude) {
mca_btl_ofud_component.if_exclude_list =
opal_argv_split(mca_btl_ofud_component.if_exclude, ',');
mca_btl_ofud_component.if_list =
opal_argv_copy(mca_btl_ofud_component.if_exclude_list);
}
ib_devs = ibv_get_device_list(&num_devs);
if(0 == num_devs) {
mca_btl_base_error_no_nics("OpenFabrics UD", "HCA");
mca_btl_ud_modex_send();
return NULL;
btls = NULL;
goto free_include_list;
}
/** We must loop through all the hca id's, get their handles and
@ -313,23 +461,31 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
if(!ib_dev_context) {
BTL_ERROR(("error obtaining device context for %s: %s\n",
ibv_get_device_name(ib_dev), strerror(errno)));
return NULL;
btls = NULL;
goto free_dev_list;
}
if(ibv_query_device(ib_dev_context, &ib_dev_attr)){
BTL_ERROR(("error obtaining device attributes for %s: %s\n",
ibv_get_device_name(ib_dev), strerror(errno)));
return NULL;
btls = NULL;
goto free_dev_list;
}
allowed_ports = (int*)malloc(ib_dev_attr.phys_port_cnt * sizeof(int));
port_cnt = get_port_list(ib_dev,
ib_dev_attr.phys_port_cnt, allowed_ports);
/* Note ports are 1 based hence j = 1 */
for(j = 1; j <= ib_dev_attr.phys_port_cnt; j++) {
for(j = 1; j <= port_cnt; j++) {
struct ibv_port_attr ib_port_attr;
if(ibv_query_port(ib_dev_context, (uint8_t)j, &ib_port_attr)) {
k = allowed_ports[j];
if(ibv_query_port(ib_dev_context, (uint8_t)k, &ib_port_attr)) {
BTL_ERROR(("error getting port attributes for device %s port %d: %s",
ibv_get_device_name(ib_dev), j, strerror(errno)));
ibv_get_device_name(ib_dev), k, strerror(errno)));
return NULL;
}
@ -343,7 +499,7 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
ud_btl->ib_dev = ib_dev;
ud_btl->ib_dev_context = ib_dev_context;
ud_btl->ib_port_num = (uint8_t)j;
ud_btl->ib_port_num = (uint8_t)k;
ud_btl->addr.subnet = ib_port_attr.sm_lid;
ud_btl->addr.lid = ib_port_attr.lid;
@ -355,6 +511,8 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
}
}
free(allowed_ports);
/* Allocate space for btl modules */
mca_btl_ofud_component.ud_btls = (mca_btl_ud_module_t*)
@ -396,7 +554,6 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
}
OBJ_DESTRUCT(&btl_list);
mca_btl_ud_modex_send();
/* Since not all modules may have initialized successfully, realloc
to free space from failed modules */
@ -408,7 +565,21 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules,
*num_btl_modules = mca_btl_ofud_component.num_btls;
free_dev_list:
ibv_free_device_list(ib_devs);
free_include_list:
if (NULL != mca_btl_ofud_component.if_include_list) {
opal_argv_free(mca_btl_ofud_component.if_include_list);
mca_btl_ofud_component.if_include_list = NULL;
}
if (NULL != mca_btl_ofud_component.if_exclude_list) {
opal_argv_free(mca_btl_ofud_component.if_exclude_list);
mca_btl_ofud_component.if_exclude_list = NULL;
}
modex_send:
mca_btl_ud_modex_send();
return btls;
}