1
1

The new cpc selection framework is now in place. The patch below allows

for dynamic selection of cpc methods based on what is available.  It
also allows for inclusion/exclusions of methods.  It even futher allows
for modifying the priorities of certain cpc methods to better determine
the optimal cpc method.

This patch also contains XRC compile time disablement (per Jeff's
patch).

At a high level, the cpc selections works by walking through each cpc
and allowing it to test to see if it is permissable to run on this
mpirun.  It returns a priority if it is permissable or a -1 if not.  All
of the cpc names and priorities are rolled into a string.  This string
is then encapsulated in a message and passed around all the ompi
processes.  Once received and unpacked, the list received is compared
to a local copy of the list.  The connection method is chosen by
comparing the lists passed around to all nodes via modex with the list
generated locally.  Any non-negative number is a potentially valid
connection method.  The method below of determining the optimal
connection method is to take the cross-section of the two lists.  The
highest single value (and the other side being non-negative) is selected
as the cpc method.

svn merge -r 16948:17128 https://svn.open-mpi.org/svn/ompi/tmp-public/openib-cpc/ .

This commit was SVN r17138.
Этот коммит содержится в:
Jon Mason 2008-01-14 23:22:03 +00:00
родитель 6e50fca2dd
Коммит a0d4122606
16 изменённых файлов: 423 добавлений и 162 удалений

Просмотреть файл

@ -102,7 +102,8 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
AS_IF([test "$ompi_check_openib_happy" = "yes"],
[AC_CHECK_DECLS([IBV_EVENT_CLIENT_REREGISTER], [], [],
[#include <infiniband/verbs.h>])
AC_CHECK_FUNCS([ibv_get_device_list ibv_resize_cq ibv_open_xrc_domain])])
AC_CHECK_FUNCS([ibv_get_device_list ibv_resize_cq])
AC_CHECK_FUNCS([ibv_open_xrc_domain], [$1_have_xrc=1])])
CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS"
LDFLAGS="$ompi_check_openib_$1_save_LDFLAGS"

Просмотреть файл

@ -258,8 +258,6 @@ int mca_bml_r2_add_procs(
btl_inuse++;
if(NULL == bml_endpoint) {
/* allocate bml specific proc data */
bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
if (NULL == bml_endpoint) {
@ -281,8 +279,7 @@ int mca_bml_r2_add_procs(
}
bml_endpoints[p] =(mca_bml_base_endpoint_t*) proc->proc_bml;
/* dont allow an additional BTL with a lower exclusivity ranking */
size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
if(size > 0) {

Просмотреть файл

@ -53,14 +53,18 @@ sources = \
connect/btl_openib_connect_base.c \
connect/btl_openib_connect_oob.c \
connect/btl_openib_connect_oob.h \
connect/btl_openib_connect_xoob.c \
connect/btl_openib_connect_xoob.h \
connect/btl_openib_connect_rdma_cm.c \
connect/btl_openib_connect_rdma_cm.h \
connect/btl_openib_connect_ibcm.c \
connect/btl_openib_connect_ibcm.h \
connect/connect.h
if MCA_btl_openib_have_xrc
sources += \
connect/btl_openib_connect_xoob.c \
connect/btl_openib_connect_xoob.h
endif
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).

Просмотреть файл

@ -219,7 +219,6 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
} else
#endif
{
openib_btl->qps[qp].u.srq_qp.srq =
ibv_create_srq(openib_btl->hca->ib_pd, &attr);
}
@ -313,6 +312,7 @@ int mca_btl_openib_add_procs(
for(i = 0; i < (int) nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
mca_btl_openib_proc_t* ib_proc;
bool cpc_error = 0;
if(NULL == (ib_proc = mca_btl_openib_proc_create(ompi_proc))) {
return OMPI_ERR_OUT_OF_RESOURCE;
@ -322,6 +322,16 @@ int mca_btl_openib_add_procs(
/* check if the remote proc has a reachable subnet first */
BTL_VERBOSE(("got %d port_infos \n", ib_proc->proc_port_count));
for(j = 0; j < (int) ib_proc->proc_port_count; j++){
int rc;
/* Setup connect module */
rc = ompi_btl_openib_connect_base_select(ib_proc->proc_ports[j].cpclist,
openib_btl->port_info.cpclist);
if (rc != OMPI_SUCCESS) {
cpc_error = 1;
continue;
}
BTL_VERBOSE(("got a subnet %016x\n",
ib_proc->proc_ports[j].subnet_id));
if(ib_proc->proc_ports[j].subnet_id ==
@ -330,7 +340,12 @@ int mca_btl_openib_add_procs(
rem_subnet_id_port_cnt ++;
}
}
if (cpc_error) {
BTL_ERROR(("cpc_error error"));
return OMPI_ERROR;
}
if(!rem_subnet_id_port_cnt ) {
/* no use trying to communicate with this endpointlater */
BTL_VERBOSE(("No matching subnet id was found, moving on.. \n"));

Просмотреть файл

@ -46,6 +46,8 @@
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "connect/connect.h"
BEGIN_C_DECLS
#define HAVE_XRC (defined(HAVE_IBV_OPEN_XRC_DOMAIN) && (1 == OMPI_ENABLE_CONNECTX_XRC_SUPPORT))
@ -229,6 +231,7 @@ struct mca_btl_openib_port_info {
#if HAVE_XRC
uint16_t lid; /* used only in xrc */
#endif
char *cpclist;
};
typedef struct mca_btl_openib_port_info mca_btl_openib_port_info_t;

Просмотреть файл

@ -155,30 +155,70 @@ static int btl_openib_component_close(void)
*/
static int btl_openib_modex_send(void)
{
int rc, i;
size_t size;
mca_btl_openib_port_info_t *ports = NULL;
int rc, i;
char *message, *offset;
uint32_t size, size_save;
size_t msg_size;
size = mca_btl_openib_component.ib_num_btls * sizeof (mca_btl_openib_port_info_t);
if (size != 0) {
ports = (mca_btl_openib_port_info_t *)malloc (size);
if (NULL == ports) {
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* The message is packed into 2 parts:
* 1. a uint32_t indicating the number of ports in the message
* 2. for each port:
* a. the port data
* b. a uint32_t indicating a string length
* c. the string cpc list for that port, length specified by 2b.
*/
msg_size = sizeof(uint32_t) + mca_btl_openib_component.ib_num_btls * (sizeof(uint32_t) + sizeof(mca_btl_openib_port_info_t));
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
msg_size += strlen(mca_btl_openib_component.openib_btls[i]->port_info.cpclist);
}
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
mca_btl_openib_module_t *btl = mca_btl_openib_component.openib_btls[i];
ports[i] = btl->port_info;
if (0 == msg_size) {
return 0;
}
message = malloc(msg_size);
if (NULL == message) {
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Pack the number of ports */
size = mca_btl_openib_component.ib_num_btls;
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
MCA_BTL_OPENIB_PORT_INFO_HTON(ports[i]);
size = htonl(size);
#endif
}
}
rc = ompi_modex_send (&mca_btl_openib_component.super.btl_version, ports, size);
if (NULL != ports) {
free (ports);
memcpy(message, &size, sizeof(size));
offset = message + sizeof(size);
/* Pack each of the ports */
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
/* Pack the port struct */
memcpy(offset, &mca_btl_openib_component.openib_btls[i]->port_info, sizeof(mca_btl_openib_port_info_t));
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
MCA_BTL_OPENIB_PORT_INFO_HTON(*(mca_btl_openib_port_info_t *)offset);
#endif
offset += sizeof(mca_btl_openib_port_info_t);
/* Pack the strlen of the cpclist */
size = size_save =
strlen(mca_btl_openib_component.openib_btls[i]->port_info.cpclist);
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
size = htonl(size);
#endif
memcpy(offset, &size, sizeof(size));
offset += sizeof(size);
/* Pack the string */
memcpy(offset,
mca_btl_openib_component.openib_btls[i]->port_info.cpclist,
size_save);
offset += size_save;
}
rc = ompi_modex_send(&mca_btl_openib_component.super.btl_version,
message, msg_size);
free(message);
return rc;
}
@ -357,6 +397,8 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
lid < ib_port_attr->lid + lmc; lid++){
for(i = 0; i < mca_btl_openib_component.btls_per_lid; i++){
char param[40];
int rc;
openib_btl = malloc(sizeof(mca_btl_openib_module_t));
if(NULL == openib_btl) {
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
@ -383,6 +425,11 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
openib_btl->port_info.lid = lid;
}
#endif
rc = ompi_btl_openib_connect_base_query(&openib_btl->port_info.cpclist, hca);
if (OMPI_SUCCESS != rc) {
continue;
}
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbfunc = btl_openib_control;
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbdata = NULL;
@ -1295,10 +1342,6 @@ btl_openib_component_init(int *num_btl_modules,
return NULL;
}
/* Setup connect module */
if (OMPI_SUCCESS != ompi_btl_openib_connect_base_select()) {
return NULL;
}
btl_openib_modex_send();
*num_btl_modules = mca_btl_openib_component.ib_num_btls;

Просмотреть файл

@ -100,17 +100,19 @@ static mca_btl_openib_proc_t* mca_btl_openib_proc_lookup_ompi(ompi_proc_t* ompi_
mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
{
mca_btl_openib_proc_t* module_proc = NULL;
size_t size;
size_t msg_size;
uint32_t size;
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
size_t i;
#endif
int rc;
void *message;
char *offset;
/* Check if we have already created a IB proc
* structure for this ompi process */
module_proc = mca_btl_openib_proc_lookup_ompi(ompi_proc);
if(module_proc != NULL) {
if (NULL != module_proc) {
/* Gotcha! */
return module_proc;
}
@ -126,34 +128,59 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
* size) to represent the proc */
module_proc->proc_guid = ompi_proc->proc_name;
/* query for the peer address info */
rc = ompi_modex_recv(
&mca_btl_openib_component.super.btl_version,
ompi_proc,
(void*)&module_proc->proc_ports,
&size
);
if(OMPI_SUCCESS != rc) {
rc = ompi_modex_recv(&mca_btl_openib_component.super.btl_version,
ompi_proc,
&message,
&msg_size);
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("[%s:%d] ompi_modex_recv failed for peer %s",
__FILE__, __LINE__,
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
OBJ_RELEASE(module_proc);
return NULL;
}
if((size % sizeof(mca_btl_openib_port_info_t)) != 0) {
BTL_ERROR(("[%s:%d] invalid module address for peer %s",
__FILE__, __LINE__,
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
OBJ_RELEASE(module_proc);
if (0 == msg_size) {
return NULL;
}
module_proc->proc_port_count = size/sizeof(mca_btl_openib_port_info_t);
/* Message was packed in btl_openib_component.c; the format is
listed in a comment in that file */
/* Unpack the number of ports in the message */
offset = message;
memcpy(&(module_proc->proc_port_count), offset, sizeof(uint32_t));
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
module_proc->proc_port_count = ntohl(module_proc->proc_port_count);
#endif
module_proc->proc_ports = (mca_btl_openib_port_info_t *)malloc(sizeof(mca_btl_openib_port_info_t) * module_proc->proc_port_count);
offset += sizeof(uint32_t);
/* Loop over unpacking all the ports */
for (i = 0; i < module_proc->proc_port_count; i++) {
/* Unpack the port */
memcpy(&module_proc->proc_ports[i], offset,
sizeof(mca_btl_openib_port_info_t));
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
MCA_BTL_OPENIB_PORT_INFO_NTOH(module_proc->proc_ports[i]);
#endif
offset += sizeof(mca_btl_openib_port_info_t);
/* Unpack the string length */
memcpy(&size, offset, sizeof(size));
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
size = ntohl(size);
#endif
offset += sizeof(size);
/* Unpack the string */
module_proc->proc_ports[i].cpclist = malloc(size + 1);
if (NULL == module_proc->proc_ports[i].cpclist) {
/* JMS some error */
}
memcpy(module_proc->proc_ports[i].cpclist, offset, size);
module_proc->proc_ports[i].cpclist[size] = '\0';
offset += size;
}
if (0 == module_proc->proc_port_count) {
module_proc->proc_endpoints = NULL;
@ -161,13 +188,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(module_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*));
}
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
for(i=0; i < module_proc->proc_port_count; ++i) {
MCA_BTL_OPENIB_PORT_INFO_NTOH(module_proc->proc_ports[i]);
}
#endif
if(NULL == module_proc->proc_endpoints) {
if (NULL == module_proc->proc_endpoints) {
OBJ_RELEASE(module_proc);
return NULL;
}

Просмотреть файл

@ -18,6 +18,14 @@
# $HEADER$
#
# MCA_btl_openib_POST_CONFIG([should_build])
# ------------------------------------------
AC_DEFUN([MCA_btl_openib_POST_CONFIG], [
AS_IF([test $1 -eq 0 -a "$enable_dist" = "yes"],
[AC_MSG_ERROR([BTL openib is disabled but --enable-dist specifed. This will result in a bad tarball. Aborting configure.])])
AM_CONDITIONAL([MCA_btl_openib_have_xrc], [test $1 -eq 1 -a "x$btl_openib_have_xrc" = "x1" -a "x$ompi_want_connectx_xrc" = "x1"])
])
# MCA_btl_openib_CONFIG([action-if-can-compile],
# [action-if-cant-compile])

Просмотреть файл

@ -28,7 +28,8 @@ int ompi_btl_openib_connect_base_open(void);
/*
* Select function
*/
int ompi_btl_openib_connect_base_select(void);
int ompi_btl_openib_connect_base_select(char*, char*);
int ompi_btl_openib_connect_base_query(char**, mca_btl_openib_hca_t*);
END_C_DECLS

Просмотреть файл

@ -33,7 +33,9 @@ ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect = {
*/
static ompi_btl_openib_connect_base_funcs_t *all[] = {
&ompi_btl_openib_connect_oob,
#if HAVE_XRC
&ompi_btl_openib_connect_xoob,
#endif
&ompi_btl_openib_connect_rdma_cm,
&ompi_btl_openib_connect_ibcm,
NULL
@ -42,7 +44,8 @@ static ompi_btl_openib_connect_base_funcs_t *all[] = {
/*
* MCA parameter value
*/
static char *param = NULL;
static char *cpc_include = NULL;
static char *cpc_exclude = NULL;
/*
* Register MCA parameters
@ -50,48 +53,35 @@ static char *param = NULL;
int ompi_btl_openib_connect_base_open(void)
{
int i;
char **temp, *a, *b;
char **temp, *list, *string;
/* Make an MCA parameter to select which connect module to use */
temp = NULL;
for (i = 0; NULL != all[i]; ++i) {
opal_argv_append_nosize(&temp, all[i]->bcf_name);
}
a = opal_argv_join(temp, ',');
list = opal_argv_join(temp, ',');
opal_argv_free(temp);
asprintf(&b,
"Method used to make OpenFabrics connections (valid values: %s)",
a);
asprintf(&string,
"Method used to select OpenFabrics connections (valid values: %s)",
list);
/* For XRC qps we must to use XOOB connection manager */
if (mca_btl_openib_component.num_xrc_qps > 0) {
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"connect",
b, false, false,
"xoob", &param);
if (0 != strcmp("xoob", param)) {
opal_show_help("help-mpi-btl-openib.txt",
"XRC with wrong OOB", true,
orte_system_info.nodename,
mca_btl_openib_component.num_xrc_qps);
return OMPI_ERROR;
}
} else { /* For all others we should use OOB */
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"connect",
b, false, false,
"oob", &param);
if (0 != strcmp("oob", param)) {
opal_show_help("help-mpi-btl-openib.txt",
"SRQ or PP with wrong OOB", true,
orte_system_info.nodename,
mca_btl_openib_component.num_srq_qps,
mca_btl_openib_component.num_pp_qps);
return OMPI_ERROR;
}
}
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"cpc_include", string, false, false, NULL, &cpc_include);
free(string);
/* Call the open function on all the connect modules */
asprintf(&string,
"Method used to exclude OpenFabrics connections (valid values: %s)",
list);
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"cpc_exclude", string, false, false, NULL, &cpc_exclude);
free(list);
free(string);
/* Call the open function on all the connect modules so that they
* may setup any MCA params specific to the connection type
*/
for (i = 0; NULL != all[i]; ++i) {
if (NULL != all[i]->bcf_open) {
all[i]->bcf_open();
@ -101,33 +91,141 @@ int ompi_btl_openib_connect_base_open(void)
return OMPI_SUCCESS;
}
int ompi_btl_openib_connect_base_select(void)
/*
* The connection method is chosen by comparing the lists passed around
* to all nodes via modex with the list generated locally. Any
* non-negative number is a potentially valid connection method. The
* method below of determining the optimal connection method is to take
* the cross-section of the two lists. The highest single value (and
* the other side being non-negative) is selected as the cpc method.
*/
int ompi_btl_openib_connect_base_select(char *remotelist, char *locallist)
{
int i;
int i, j, max = -1;
char **localist_formatted, **remotelist_formatted;
char *name;
/* Go through all the pseudo-components; if the btl_openib_connect
param is empty, then take the first one that returns
OMPI_SUCCESS from its init function. If
btl_openib_connect_param is not empty, find that one and ensure
that its init function returns OMPI_SUCCESS. */
if (NULL != param && '\0' == param[0]) {
param = NULL;
}
for (i = 0; NULL != all[i]; ++i) {
if ((NULL != param && 0 == strcmp(all[i]->bcf_name, param)) ||
(NULL == param)) {
if (NULL != all[i]->bcf_init &&
OMPI_SUCCESS == all[i]->bcf_init()) {
ompi_btl_openib_connect = *(all[i]);
break;
BTL_VERBOSE(("remotelist = %s locallist = %s", remotelist, locallist));
localist_formatted = opal_argv_split(locallist, ',');
remotelist_formatted = opal_argv_split(remotelist, ',');
for (i = 0; NULL != localist_formatted[i] && NULL != localist_formatted[i+1]; i+=2) {
for (j = 0; NULL != remotelist_formatted[j] && NULL != remotelist_formatted[j+1]; j+=2) {
int local_val, remote_val;
local_val = atoi(localist_formatted[i+1]);
remote_val = atoi(remotelist_formatted[j+1]);
if (0 == strcmp(localist_formatted[i], remotelist_formatted[j]) &&
(-1 != local_val && -1 != remote_val)) {
if (local_val > max) {
max = local_val;
name = localist_formatted[i];
}
if (remote_val > max) {
max = remote_val;
name = remotelist_formatted[j];
}
}
}
}
if (NULL == all[i]) {
/* JMS opal_show_help */
return OMPI_ERR_NOT_FOUND;
if (-1 == max) {
BTL_ERROR(("Failed to find any working connections"));
return OMPI_ERROR;
}
for (i = 0; NULL != all[i]; i++) {
if (0 == strcmp(all[i]->bcf_name, name)) {
int rc;
rc = all[i]->bcf_init();
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("A problem was encountered with %s, ignoring this cpc", all[i]->bcf_name));
return OMPI_ERROR;
}
ompi_btl_openib_connect = *(all[i]);
break;
}
}
BTL_VERBOSE(("%s selected as transport", all[i]->bcf_name));
opal_argv_free(localist_formatted);
opal_argv_free(remotelist_formatted);
return OMPI_SUCCESS;
}
static inline int cpc_specific_query(char ***cpclist, mca_btl_openib_hca_t *hca, int cpc_counter, bool *valid)
{
char *temp;
int rc;
if (NULL == all[cpc_counter]->bcf_query) {
return OMPI_SUCCESS;
}
rc = all[cpc_counter]->bcf_query(hca);
if (rc > 0) {
*valid = 1;
}
asprintf(&temp, "%s,%d", all[cpc_counter]->bcf_name, rc);
opal_argv_append_nosize(cpclist, temp);
return OMPI_SUCCESS;
}
int ompi_btl_openib_connect_base_query(char **cpclist, mca_btl_openib_hca_t *hca)
{
int i, rc;
bool valid = 0;
char **cpclist_include, **cpclist_exclude, **namepriority_list = NULL;
cpclist_include = opal_argv_split(cpc_include, ',');
cpclist_exclude = opal_argv_split(cpc_exclude, ',');
/* Go through all the CMs to create a list of usable CPCs */
for (i = 0; NULL != all[i]; ++i) {
if (NULL != cpclist_include) {
int j;
for (j = 0; NULL != cpclist_include[j]; ++j) {
if (0 == strcmp(cpclist_include[j], all[i]->bcf_name)) {
rc = cpc_specific_query(&namepriority_list, hca, i, &valid);
if (OMPI_ERROR == rc) {
return OMPI_ERROR;
}
}
}
} else if (NULL != cpclist_exclude) {
int j;
for (j = 0; NULL != cpclist_exclude[j]; ++j) {
if (0 != strcmp(cpclist_exclude[j], all[i]->bcf_name)) {
rc = cpc_specific_query(&namepriority_list, hca, i, &valid);
if (OMPI_ERROR == rc) {
return OMPI_ERROR;
}
}
}
} else {
rc = cpc_specific_query(&namepriority_list, hca, i, &valid);
if (OMPI_ERROR == rc) {
return OMPI_ERROR;
}
}
}
if (0 == valid) {
BTL_ERROR(("Failed to find any valid connections for %s, not "
"using it for this run",
ibv_get_device_name(hca->ib_dev)));
return OMPI_ERROR;
}
*cpclist = opal_argv_join(namepriority_list, ',');
opal_argv_free(namepriority_list);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -13,7 +13,7 @@
#include "btl_openib_endpoint.h"
#include "connect/connect.h"
static int ibcm_open(void);
static void ibcm_open(void);
static int ibcm_init(void);
static int ibcm_connect(mca_btl_base_endpoint_t *e);
static int ibcm_finalize(void);
@ -23,17 +23,16 @@ ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_ibcm = {
ibcm_open,
ibcm_init,
ibcm_connect,
NULL,
ibcm_finalize,
};
static int ibcm_open(void)
static void ibcm_open(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"btl_openib_connect_ibcm_foo",
"connect_ibcm_foo",
"A dummy help message", false, false,
17, NULL);
return OMPI_SUCCESS;
}
static int ibcm_init(void)

Просмотреть файл

@ -39,8 +39,12 @@ typedef enum {
ENDPOINT_CONNECT_ACK
} connect_message_type_t;
static int oob_priority = 50;
static void oob_open(void);
static int oob_init(void);
static int oob_start_connect(mca_btl_base_endpoint_t *e);
static int oob_query(mca_btl_openib_hca_t *hca);
static int oob_finalize(void);
static int reply_start_connect(mca_btl_openib_endpoint_t *endpoint,
@ -67,16 +71,33 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
*/
ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_oob = {
"oob",
/* No need for "open */
NULL,
/* Open */
oob_open,
/* Init */
oob_init,
/* Connect */
oob_start_connect,
/* Query */
oob_query,
/* Finalize */
oob_finalize,
};
/* Open - this functions sets up any oob specific commandline params */
static void oob_open(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_oob_priority",
"The selection method priority for oob",
false, false, oob_priority, &oob_priority);
if (oob_priority > 100) {
oob_priority = 100;
} else if (oob_priority < -1) {
oob_priority = -1;
}
}
/*
* Init function. Post non-blocking RML receive to accept incoming
* connection requests.
@ -118,6 +139,15 @@ static int oob_start_connect(mca_btl_base_endpoint_t *endpoint)
return OMPI_SUCCESS;
}
static int oob_query(mca_btl_openib_hca_t *hca)
{
if (IBV_TRANSPORT_IB == hca->ib_dev->transport_type) {
return oob_priority;
}
return -1;
}
/*
* Finalize function. Cleanup RML non-blocking receive.
*/

Просмотреть файл

@ -13,9 +13,10 @@
#include "btl_openib_endpoint.h"
#include "connect/connect.h"
static int rdma_cm_open(void);
static void rdma_cm_open(void);
static int rdma_cm_init(void);
static int rdma_cm_connect(mca_btl_base_endpoint_t *e);
static int rdma_cm_query(mca_btl_openib_hca_t *hca);
static int rdma_cm_finalize(void);
ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_rdma_cm = {
@ -23,34 +24,52 @@ ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_rdma_cm = {
rdma_cm_open,
rdma_cm_init,
rdma_cm_connect,
rdma_cm_query,
rdma_cm_finalize,
};
static int rdma_cm_open(void)
static int rdma_cm_priority = -1;
/* Open - this functions sets up any rdma_cm specific commandline params */
static void rdma_cm_open(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"btl_openib_connect_rdma_cm_foo",
"A dummy help message", false, false,
17, NULL);
"connect_rdma_cm_priority",
"The selection method priority for rdma_cm",
false, false, rdma_cm_priority, &rdma_cm_priority);
return OMPI_SUCCESS;
if (rdma_cm_priority > 100) {
rdma_cm_priority = 100;
} else if (rdma_cm_priority < -1) {
rdma_cm_priority = -1;
}
}
static int rdma_cm_init(void)
{
printf("rdma cm init\n");
BTL_ERROR(("rdma cm init"));
return OMPI_ERR_NOT_IMPLEMENTED;
}
static int rdma_cm_connect(mca_btl_base_endpoint_t *e)
{
printf("rdma cm connect\n");
BTL_ERROR(("rdma cm connect"));
return OMPI_ERR_NOT_IMPLEMENTED;
}
static int rdma_cm_query(mca_btl_openib_hca_t *hca)
{
if (IBV_TRANSPORT_IWARP == hca->ib_dev->transport_type) {
BTL_ERROR(("rdma cm Not currently supported"));
return rdma_cm_priority;
}
return -1;
}
static int rdma_cm_finalize(void)
{
printf("rdma cm finalize\n");
BTL_ERROR(("rdma cm finalize"));
return OMPI_ERR_NOT_IMPLEMENTED;
}

Просмотреть файл

@ -22,8 +22,10 @@
#include "btl_openib_xrc.h"
#include "connect/connect.h"
static void xoob_open(void);
static int xoob_init(void);
static int xoob_start_connect(mca_btl_base_endpoint_t *e);
static int xoob_query(mca_btl_openib_hca_t *hca);
static int xoob_finalize(void);
/*
@ -32,18 +34,18 @@ static int xoob_finalize(void);
*/
ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_xoob = {
"xoob",
/* No need for "open */
NULL,
/* Open */
xoob_open,
/* Init */
xoob_init,
/* Connect */
xoob_start_connect,
/* Query */
xoob_query,
/* Finalize */
xoob_finalize,
};
#if HAVE_XRC
typedef enum {
SEND,
RECV
@ -99,6 +101,24 @@ static void xoob_rml_recv_cb(int status, orte_process_name_t* process_name,
static int init_rem_info(mca_btl_openib_rem_info_t *rem_info);
static void free_rem_info(mca_btl_openib_rem_info_t *rem_info);
static int xoob_priority = 60;
/* Open - this functions sets up any xoob specific commandline params */
static void xoob_open(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_xoob_priority",
"The selection method priority for xoob",
false, false, xoob_priority, &xoob_priority);
if (xoob_priority > 100) {
xoob_priority = 100;
} else if (xoob_priority < -1) {
xoob_priority = -1;
}
}
/*
* Init function. Post non-blocking RML receive to accept incoming
* connection requests.
@ -173,6 +193,15 @@ static int xoob_start_connect(mca_btl_base_endpoint_t *endpoint)
return rc;
}
static int xoob_query(mca_btl_openib_hca_t *hca)
{
if (mca_btl_openib_component.num_xrc_qps > 0) {
return xoob_priority;
}
return -1;
}
/*
* Finalize function. Cleanup RML non-blocking receive.
*/
@ -836,24 +865,3 @@ static void free_rem_info(mca_btl_openib_rem_info_t *rem_info)
free(rem_info->rem_srqs);
}
}
#else
/* In case if the XRC was disabled during compilation we will print message and return error */
static int xoob_init(void)
{
printf("xoob init\n");
return OMPI_ERR_NOT_IMPLEMENTED;
}
static int xoob_start_connect(mca_btl_base_endpoint_t *e)
{
printf("xoob start connect\n");
return OMPI_ERR_NOT_IMPLEMENTED;
}
static int xoob_finalize(void)
{
printf("xoob finalize\n");
return OMPI_ERR_NOT_IMPLEMENTED;
}
#endif

Просмотреть файл

@ -62,16 +62,23 @@
* main openib BTL will start sending out fragments that were queued
* while the connection was establing, etc.).
*/
#ifndef BTL_OPENIB_CONNECT_H
#define BTL_OPENIB_CONNECT_H
BEGIN_C_DECLS
#define BCF_MAX_NAME 64
/**
* Must forward declare mca_btl_openib_hca_t; it's defined in
* btl_openib.h, but that file includes this file.
*/
struct mca_btl_openib_hca_t;
/**
* Function to register MCA params in the connect functions
*/
typedef int (*ompi_btl_openib_connect_base_func_open_t)(void);
typedef void (*ompi_btl_openib_connect_base_func_open_t)(void);
/**
* Function to intialize the connection functions (i.e., it's been
@ -85,13 +92,16 @@ typedef int (*ompi_btl_openib_connect_base_func_init_t)(void);
typedef int (*ompi_btl_openib_connect_base_func_start_connect_t)
(struct mca_btl_base_endpoint_t *e);
/**
* Query the CPC to see if it wants to run on a specific HCA
*/
typedef int (*ompi_btl_openib_connect_base_func_query_t)(struct mca_btl_openib_hca_t *hca);
/**
* Function to finalize the connection functions
*/
typedef int (*ompi_btl_openib_connect_base_func_finalize_t)(void);
#define BCF_MAX_NAME 64
struct ompi_btl_openib_connect_base_funcs_t {
/** Name of this set of connection functions */
char bcf_name[BCF_MAX_NAME];
@ -105,8 +115,11 @@ struct ompi_btl_openib_connect_base_funcs_t {
/** Connect function */
ompi_btl_openib_connect_base_func_start_connect_t bcf_start_connect;
/** Query function */
ompi_btl_openib_connect_base_func_query_t bcf_query;
/** Finalize function */
ompi_btl_openib_connect_base_func_open_t bcf_finalize;
ompi_btl_openib_connect_base_func_finalize_t bcf_finalize;
};
typedef struct ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_base_funcs_t;

Просмотреть файл

@ -28,6 +28,7 @@ typedef struct mca_pml_v_t mca_pml_v_t;
OMPI_MODULE_DECLSPEC extern mca_pml_v_t mca_pml_v;
OMPI_DECLSPEC extern mca_pml_base_component_1_0_0_t mca_pml_v_component;
OMPI_DECLSPEC extern mca_pml_base_component_1_0_0_t mca_pml_v_component;
END_C_DECLS