1
1

Merge in /tmp-public/cpc3 branch to trunk. oob/xoob still remains the

default CPC.

This commit was SVN r18356.
Этот коммит содержится в:
Jeff Squyres 2008-05-02 11:52:33 +00:00
родитель 357428f82f
Коммит ba5615a18f
33 изменённых файлов: 5756 добавлений и 699 удалений

Просмотреть файл

@ -28,6 +28,8 @@
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
# support, otherwise executes action-if-not-found
AC_DEFUN([OMPI_CHECK_OPENIB],[
OMPI_VAR_SCOPE_PUSH([$1_msg])
AC_ARG_WITH([openib],
[AC_HELP_STRING([--with-openib(=DIR)],
[Build OpenFabrics support, searching for libraries in DIR])])
@ -99,6 +101,12 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
[$ompi_cv_func_ibv_create_cq_args],
[Number of arguments to ibv_create_cq])])])
# Set these up so that we can do an AC_DEFINE below
# (unconditionally)
$1_have_rdmacm=0
$1_have_ibcm=0
# If we have the openib stuff available, find out what we've got
AS_IF([test "$ompi_check_openib_happy" = "yes"],
[AC_CHECK_DECLS([IBV_EVENT_CLIENT_REREGISTER], [], [],
[#include <infiniband/verbs.h>])
@ -110,8 +118,38 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
# ibv_create_xrc_rcv_qp was added in OFED 1.3
AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp], [$1_have_xrc=1])
# Do we have a recent enough RDMA CM? Need to have the
# rdma_get_peer_addr (inline) function (originally appeared
# in OFED v1.3).
AC_CHECK_HEADERS([rdma/rdma_cma.h],
[AC_CHECK_LIB([rdmacm], [rdma_create_id],
[AC_MSG_CHECKING([for rdma_get_peer_addr])
$1_msg=no
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include "rdma/rdma_cma.h"
]], [[void *ret = (void*) rdma_get_peer_addr((struct rdma_cm_id*)0);]])],
[$1_have_rdmacm=1
$1_msg=yes])
AC_MSG_RESULT([$$1_msg])])])
if test "1" = "$$1_have_rdmacm"; then
$1_LIBS="-lrdmacm $$1_LIBS"
fi
# Do we have IB CM? (note that OFED IB CM depends on RDMA
# CM, so no need to add it into the other-libraries
# argument to AC_CHECK_ LIB).
AC_CHECK_HEADERS([infiniband/cm.h],
[AC_CHECK_LIB([ibcm], [ib_cm_create_id],
[$1_have_ibcm=1
$1_LIBS="-libcm $$1_LIBS"])])
])
AC_DEFINE_UNQUOTED([OMPI_HAVE_RDMACM], [$$1_have_rdmacm],
[Whether RDMA CM is available or not])
AC_DEFINE_UNQUOTED([OMPI_HAVE_IBCM], [$$1_have_ibcm],
[Whether IB CM is available or not])
CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS"
LDFLAGS="$ompi_check_openib_$1_save_LDFLAGS"
LIBS="$ompi_check_openib_$1_save_LIBS"
@ -123,5 +161,7 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
AC_MSG_WARN([If you are using libibverbs v1.0 (i.e., OFED v1.0 or v1.1), you *MUST* have both the libsysfs headers and libraries installed. Later versions of libibverbs do not require libsysfs.])
AC_MSG_ERROR([Aborting.])])
$3])
OMPI_VAR_SCOPE_POP
])

Просмотреть файл

@ -9,7 +9,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Cisco, Inc. All rights reserved.
# Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -26,6 +26,7 @@ dist_amca_param_DATA = btl-openib-benchmark
dist_pkgdata_DATA = \
help-mpi-btl-openib.txt \
connect/help-mpi-btl-openib-cpc-base.txt \
mca-btl-openib-hca-params.ini
sources = \
@ -49,22 +50,41 @@ sources = \
btl_openib_async.h \
btl_openib_xrc.c \
btl_openib_xrc.h \
btl_openib_fd.h \
btl_openib_fd.c \
connect/base.h \
connect/btl_openib_connect_base.c \
connect/btl_openib_connect_oob.c \
connect/btl_openib_connect_oob.h \
connect/btl_openib_connect_rdma_cm.c \
connect/btl_openib_connect_rdma_cm.h \
connect/btl_openib_connect_ibcm.c \
connect/btl_openib_connect_ibcm.h \
connect/btl_openib_connect_empty.c \
connect/btl_openib_connect_empty.h \
connect/connect.h
# If we have XRC support, build that CPC
if MCA_btl_openib_have_xrc
sources += \
connect/btl_openib_connect_xoob.c \
connect/btl_openib_connect_xoob.h
endif
# If we have ibcm support, build that CPC
if MCA_btl_openib_have_ibcm
sources += \
connect/btl_openib_connect_ibcm.c \
connect/btl_openib_connect_ibcm.h
dist_pkgdata_DATA += connect/help-mpi-btl-openib-cpc-ibcm.txt
endif
# If we have rdmacm support, build that CPC
if MCA_btl_openib_have_rdmacm
sources += \
connect/btl_openib_connect_rdmacm.c \
connect/btl_openib_connect_rdmacm.h
dist_pkgdata_DATA += connect/help-mpi-btl-openib-cpc-rdmacm.txt
endif
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).

Просмотреть файл

@ -289,6 +289,8 @@ int mca_btl_openib_add_procs(
int lcl_subnet_id_port_cnt = 0;
int btl_rank = 0;
mca_btl_base_endpoint_t* endpoint;
ompi_btl_openib_connect_base_module_t *local_cpc;
ompi_btl_openib_connect_base_module_data_t *remote_cpc_data;
for(j=0; j < mca_btl_openib_component.ib_num_btls; j++){
if(mca_btl_openib_component.openib_btls[j]->port_info.subnet_id
@ -305,66 +307,85 @@ int mca_btl_openib_add_procs(
NULL == mca_btl_openib_component.ib_addr_table.ht_table) {
if(OPAL_SUCCESS != opal_hash_table_init(
&mca_btl_openib_component.ib_addr_table, nprocs)) {
BTL_ERROR(("XRC internal error. Failed to allocate ib_table\n"));
BTL_ERROR(("XRC internal error. Failed to allocate ib_table"));
return OMPI_ERROR;
}
}
#endif
for(i = 0; i < (int) nprocs; i++) {
for (i = 0; i < (int) nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
mca_btl_openib_proc_t* ib_proc;
bool cpc_error = 0;
int remote_matching_port;
opal_output(-1, "add procs: adding proc %d", i);
if(NULL == (ib_proc = mca_btl_openib_proc_create(ompi_proc))) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
rem_subnet_id_port_cnt = 0;
/* check if the remote proc has a reachable subnet first */
BTL_VERBOSE(("got %d port_infos \n", ib_proc->proc_port_count));
for(j = 0; j < (int) ib_proc->proc_port_count; j++){
int rc;
/* Setup connect module */
rc = ompi_btl_openib_connect_base_select(ib_proc->proc_ports[j].cpclist,
openib_btl->port_info.cpclist);
if (rc != OMPI_SUCCESS) {
cpc_error = 1;
continue;
}
BTL_VERBOSE(("got a subnet %016x\n",
ib_proc->proc_ports[j].subnet_id));
if(ib_proc->proc_ports[j].subnet_id ==
openib_btl->port_info.subnet_id) {
BTL_VERBOSE(("Got a matching subnet!\n"));
rem_subnet_id_port_cnt ++;
/* check if the remote proc has any ports that:
- on the same subnet as the local proc, and
- on that subnet, has a CPC in common with the local proc
*/
remote_matching_port = -1;
rem_subnet_id_port_cnt = 0;
BTL_VERBOSE(("got %d port_infos ", ib_proc->proc_port_count));
for (j = 0; j < (int) ib_proc->proc_port_count; j++){
BTL_VERBOSE(("got a subnet %016x",
ib_proc->proc_ports[j].pm_port_info.subnet_id));
if (ib_proc->proc_ports[j].pm_port_info.subnet_id ==
openib_btl->port_info.subnet_id) {
BTL_VERBOSE(("Got a matching subnet!"));
if (rem_subnet_id_port_cnt == btl_rank) {
remote_matching_port = j;
}
rem_subnet_id_port_cnt++;
}
}
if (cpc_error) {
BTL_ERROR(("cpc_error error"));
return OMPI_ERROR;
}
if(!rem_subnet_id_port_cnt ) {
/* no use trying to communicate with this endpointlater */
BTL_VERBOSE(("No matching subnet id was found, moving on.. \n"));
if (0 == rem_subnet_id_port_cnt) {
/* no use trying to communicate with this endpoint */
BTL_VERBOSE(("No matching subnet id/CPC was found, moving on.. "));
continue;
}
#if 0
num_endpoints = rem_subnet_id_port_cnt / lcl_subnet_id_port_cnt +
(btl_rank < (rem_subnet_id_port_cnt / lcl_subnet_id_port_cnt)) ? 1:0;
#endif
/* If this process has multiple ports on a single subnet ID,
and the report proc also has multiple ports on this same
subnet ID, the default connection pattern is:
if(rem_subnet_id_port_cnt < lcl_subnet_id_port_cnt &&
btl_rank >= rem_subnet_id_port_cnt ) {
BTL_VERBOSE(("Not enough remote ports on this subnet id, moving on.. \n"));
LOCAL REMOTE PEER
1st port on subnet X <--> 1st port on subnet X
2nd port on subnet X <--> 2nd port on subnet X
3nd port on subnet X <--> 3nd port on subnet X
...etc.
Note that the port numbers may not be contiguous, and they
may not be the same on either side. Hence the "1st", "2nd",
"3rd, etc. notation, above.
Hence, if the local "rank" of this module's port on the
subnet ID is greater than the total number of ports on the
peer on this same subnet, then we have no match. So skip
this connection. */
if (rem_subnet_id_port_cnt < lcl_subnet_id_port_cnt &&
btl_rank >= rem_subnet_id_port_cnt) {
BTL_VERBOSE(("Not enough remote ports on this subnet id, moving on.. "));
continue;
}
/* Now that we have verified that we're on the same subnet and
the remote peer has enough ports, see if that specific port
on the peer has a matching CPC. */
assert(btl_rank <= ib_proc->proc_port_count);
assert(remote_matching_port != -1);
if (OMPI_SUCCESS !=
ompi_btl_openib_connect_base_find_match(openib_btl,
&(ib_proc->proc_ports[remote_matching_port]),
&local_cpc,
&remote_cpc_data)) {
continue;
}
OPAL_THREAD_LOCK(&ib_proc->proc_lock);
/* The btl_proc datastructure is shared by all IB BTL
@ -382,7 +403,7 @@ int mca_btl_openib_add_procs(
if (MCA_BTL_XRC_ENABLED) {
int rem_port_cnt = 0;
for(j = 0; j < (int) ib_proc->proc_port_count; j++) {
if(ib_proc->proc_ports[j].subnet_id ==
if(ib_proc->proc_ports[j].pm_port_info.subnet_id ==
openib_btl->port_info.subnet_id) {
if (rem_port_cnt == btl_rank)
break;
@ -394,7 +415,8 @@ int mca_btl_openib_add_procs(
assert(rem_port_cnt == btl_rank);
/* Push the subnet/lid/jobid to xrc hash */
rc = mca_btl_openib_ib_address_add_new(
ib_proc->proc_ports[j].lid, ib_proc->proc_ports[j].subnet_id,
ib_proc->proc_ports[j].pm_port_info.lid,
ib_proc->proc_ports[j].pm_port_info.subnet_id,
ompi_proc->proc_name.jobid, endpoint);
if (OMPI_SUCCESS != rc ) {
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
@ -402,9 +424,13 @@ int mca_btl_openib_add_procs(
}
}
#endif
mca_btl_openib_endpoint_init(openib_btl, endpoint);
mca_btl_openib_endpoint_init(openib_btl, endpoint,
local_cpc,
&(ib_proc->proc_ports[remote_matching_port]),
remote_cpc_data);
rc = mca_btl_openib_proc_insert(ib_proc, endpoint);
if(rc != OMPI_SUCCESS) {
if (OMPI_SUCCESS != rc) {
OBJ_RELEASE(endpoint);
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
continue;
@ -416,6 +442,19 @@ int mca_btl_openib_add_procs(
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
continue;
}
/* Tell the selected CPC that it won. NOTE: This call is
outside of / separate from mca_btl_openib_endpoint_init()
because this function likely needs the endpoint->index. */
if (NULL != local_cpc->cbm_endpoint_init) {
rc = local_cpc->cbm_endpoint_init(endpoint);
if (OMPI_SUCCESS != rc) {
OBJ_RELEASE(endpoint);
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
continue;
}
}
ompi_bitmap_set_bit(reachable, i);
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
@ -449,11 +488,12 @@ int mca_btl_openib_del_procs(struct mca_btl_base_module_t* btl,
continue;
}
if (endpoint == del_endpoint) {
BTL_VERBOSE(("in del_procs %d, setting another endpoint to null\n",
BTL_VERBOSE(("in del_procs %d, setting another endpoint to null",
ep_index));
opal_pointer_array_set_item(openib_btl->hca->endpoints,
ep_index, NULL);
assert(((opal_object_t*)endpoint)->obj_reference_count == 1);
mca_btl_openib_proc_remove(procs[i], endpoint);
OBJ_RELEASE(endpoint);
}
}
@ -876,7 +916,7 @@ static int mca_btl_finalize_hca(struct mca_btl_openib_hca_t *hca)
{
#if OMPI_HAVE_THREADS
int hca_to_remove;
#if OMPI_ENABLE_PROGRESS_THREADS == 1
#if OMPI_ENABLE_PROGRESS_THREADS
if(hca->progress) {
hca->progress = false;
if (pthread_cancel(hca->thread.t_handle)) {
@ -899,6 +939,7 @@ static int mca_btl_finalize_hca(struct mca_btl_openib_hca_t *hca)
}
}
#endif
/* Release CQs */
if(hca->ib_cq[BTL_OPENIB_HP_CQ] != NULL) {
if (ibv_destroy_cq(hca->ib_cq[BTL_OPENIB_HP_CQ])) {
@ -974,7 +1015,7 @@ int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
endpoint=opal_pointer_array_get_item(openib_btl->hca->endpoints,
ep_index);
if(!endpoint) {
BTL_VERBOSE(("In finalize, got another null endpoint\n"));
BTL_VERBOSE(("In finalize, got another null endpoint"));
continue;
}
if(endpoint->endpoint_btl != openib_btl)
@ -987,6 +1028,16 @@ int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
}
OBJ_RELEASE(endpoint);
}
/* Finalize the CPC modules on this openib module */
for (i = 0; i < openib_btl->num_cpcs; ++i) {
if (NULL != openib_btl->cpcs[i]->cbm_finalize) {
openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]);
}
free(openib_btl->cpcs[i]);
}
free(openib_btl->cpcs);
/* Release SRQ resources */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {

Просмотреть файл

@ -56,6 +56,20 @@ BEGIN_C_DECLS
#define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
/*--------------------------------------------------------------------*/
#if OMPI_ENABLE_DEBUG
#define ATTACH() do { \
int i = 0; \
opal_output(0, "WAITING TO DEBUG ATTACH"); \
while (i == 0) sleep(5); \
} while(0);
#else
#define ATTACH()
#endif
/*--------------------------------------------------------------------*/
/**
* Infiniband (IB) BTL component.
*/
@ -224,37 +238,31 @@ OMPI_MODULE_DECLSPEC extern mca_btl_openib_component_t mca_btl_openib_component;
typedef mca_btl_base_recv_reg_t mca_btl_openib_recv_reg_t;
struct mca_btl_openib_port_info {
uint32_t mtu;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[4];
#endif
/**
* Common information for all ports that is sent in the modex message
*/
typedef struct mca_btl_openib_modex_message_t {
/** The subnet ID of this port */
uint64_t subnet_id;
uint16_t lid; /* used only in xrc */
uint16_t apm_lid; /* the lid is used for APM to different port */
char *cpclist;
};
typedef struct mca_btl_openib_port_info mca_btl_openib_port_info_t;
/** LID of this port */
uint16_t lid;
/** APM LID for this port */
uint16_t apm_lid;
/** The MTU used by this port */
uint8_t mtu;
/** Dummy field used to calculate the real length */
uint8_t end;
} mca_btl_openib_modex_message_t;
#if HAVE_XRC
#define MCA_BTL_OPENIB_LID_NTOH(hdr) (hdr).lid = ntohs((hdr).lid)
#define MCA_BTL_OPENIB_LID_HTON(hdr) (hdr).lid = htons((hdr).lid)
#else
#define MCA_BTL_OPENIB_LID_NTOH(hdr)
#define MCA_BTL_OPENIB_LID_HTON(hdr)
#endif
#define MCA_BTL_OPENIB_PORT_INFO_NTOH(hdr) \
#define MCA_BTL_OPENIB_MODEX_MSG_NTOH(hdr) \
do { \
(hdr).mtu = ntohl((hdr).mtu); \
(hdr).subnet_id = ntoh64((hdr).subnet_id); \
MCA_BTL_OPENIB_LID_NTOH(hdr); \
(hdr).lid = ntohs((hdr).lid); \
} while (0)
#define MCA_BTL_OPENIB_PORT_INFO_HTON(hdr) \
#define MCA_BTL_OPENIB_MODEX_MSG_HTON(hdr) \
do { \
(hdr).mtu = htonl((hdr).mtu); \
(hdr).subnet_id = hton64((hdr).subnet_id); \
MCA_BTL_OPENIB_LID_HTON(hdr); \
(hdr).lid = htons((hdr).lid); \
} while (0)
typedef struct mca_btl_openib_hca_qp_t {
@ -328,9 +336,20 @@ struct mca_btl_openib_module_qp_t {
* IB BTL Interface
*/
struct mca_btl_openib_module_t {
mca_btl_base_module_t super; /**< base BTL interface */
/* Base BTL module */
mca_btl_base_module_t super;
bool btl_inited;
mca_btl_openib_port_info_t port_info; /* contains only the subnet id right now */
/** Common information about all ports */
mca_btl_openib_modex_message_t port_info;
/** Array of CPCs on this port */
ompi_btl_openib_connect_base_module_t **cpcs;
/** Number of elements in the cpcs array */
uint8_t num_cpcs;
mca_btl_openib_hca_t *hca;
uint8_t port_num; /**< ID of the PORT */
uint16_t pkey_index;

Просмотреть файл

@ -107,6 +107,7 @@ static mca_btl_openib_endpoint_t * qp2endpoint(struct ibv_qp *qp, mca_btl_openib
return NULL;
}
#if HAVE_XRC
/* XRC recive QP to endpoint */
static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_openib_hca_t *hca)
{
@ -119,6 +120,7 @@ static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_open
}
return NULL;
}
#endif
/* Function inits mca_btl_openib_async_poll */
static int btl_openib_async_poll_init(struct mca_btl_openib_async_poll *hcas_poll)
@ -194,7 +196,7 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *hcas_poll
}
}
if (!fd_found) {
BTL_ERROR(("Requested FD[%d] was not found in poll array\n",fd));
BTL_ERROR(("Requested FD[%d] was not found in poll array",fd));
return OMPI_ERROR;
}
}
@ -402,8 +404,8 @@ static int apm_update_port(mca_btl_openib_endpoint_t *ep,
}
/* looking for alternatve lid on remote site */
for(port_i = 0; port_i < ep->endpoint_proc->proc_port_count; port_i++) {
if (ep->endpoint_proc->proc_ports[port_i].lid == attr->ah_attr.dlid - mca_btl_openib_component.apm_lmc) {
apm_lid = ep->endpoint_proc->proc_ports[port_i].apm_lid;
if (ep->endpoint_proc->proc_ports[port_i].pm_port_info.lid == attr->ah_attr.dlid - mca_btl_openib_component.apm_lmc) {
apm_lid = ep->endpoint_proc->proc_ports[port_i].pm_port_info.apm_lid;
}
}
if (0 == apm_lid) {

Просмотреть файл

@ -66,6 +66,7 @@
#include "btl_openib_ini.h"
#include "btl_openib_mca.h"
#include "btl_openib_xrc.h"
#include "btl_openib_fd.h"
#if OMPI_HAVE_THREADS
#include "btl_openib_async.h"
#endif
@ -145,84 +146,161 @@ int btl_openib_component_open(void)
static int btl_openib_component_close(void)
{
/* Close down the connect pseudo component */
if (NULL != ompi_btl_openib_connect.bcf_finalize) {
ompi_btl_openib_connect.bcf_finalize();
}
ompi_btl_openib_connect_base_finalize();
ompi_btl_openib_fd_finalize();
ompi_btl_openib_ini_finalize();
return OMPI_SUCCESS;
}
static void inline pack8(char **dest, uint8_t value)
{
/* Copy one character */
**dest = (char) value;
/* Most the dest ahead one */
++*dest;
}
/*
* Register OPENIB port information. The MCA framework
* will make this available to all peers.
* Register local openib port information with the modex so that it
* can be shared with all other peers.
*/
static int btl_openib_modex_send(void)
{
int rc, i;
int rc, i, j;
int modex_message_size;
mca_btl_openib_modex_message_t dummy;
char *message, *offset;
uint32_t size, size_save;
size_t msg_size;
size_t size, msg_size;
ompi_btl_openib_connect_base_module_t *cpc;
/* The message is packed into 2 parts:
* 1. a uint32_t indicating the number of ports in the message
* 2. for each port:
* a. the port data
* b. a uint32_t indicating a string length
* c. the string cpc list for that port, length specified by 2b.
*/
msg_size = sizeof(uint32_t) + mca_btl_openib_component.ib_num_btls * (sizeof(uint32_t) + sizeof(mca_btl_openib_port_info_t));
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
msg_size += strlen(mca_btl_openib_component.openib_btls[i]->port_info.cpclist);
}
if (0 == msg_size) {
opal_output(-1, "Starting to modex send");
if (0 == mca_btl_openib_component.ib_num_btls) {
return 0;
}
modex_message_size = ((char *) &(dummy.end)) - ((char*) &dummy);
/* The message is packed into multiple parts:
* 1. a uint8_t indicating the number of modules (ports) in the message
* 2. for each module:
* a. the common module data
* b. a uint8_t indicating how many CPCs follow
* c. for each CPC:
* a. a uint8_t indicating the index of the CPC in the all[]
* array in btl_openib_connect_base.c
* b. a uint8_t indicating the priority of this CPC
* c. a uint8_t indicating the length of the blob to follow
* d. a blob that is only meaningful to that CPC
*/
msg_size =
/* uint8_t for number of modules in the message */
1 +
/* For each module: */
mca_btl_openib_component.ib_num_btls *
(
/* Common module data */
modex_message_size +
/* uint8_t for how many CPCs follow */
1
);
/* For each module, add in the size of the per-CPC data */
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
for (j = 0;
j < mca_btl_openib_component.openib_btls[i]->num_cpcs;
++j) {
msg_size +=
/* uint8_t for the index of the CPC */
1 +
/* uint8_t for the CPC's priority */
1 +
/* uint8_t for the blob length */
1 +
/* blob length */
mca_btl_openib_component.openib_btls[i]->cpcs[j]->data.cbm_modex_message_len;
}
}
message = malloc(msg_size);
if (NULL == message) {
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
BTL_ERROR(("Failed malloc"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Pack the number of ports */
size = mca_btl_openib_component.ib_num_btls;
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
size = htonl(size);
#endif
memcpy(message, &size, sizeof(size));
offset = message + sizeof(size);
/* Pack the number of modules */
offset = message;
pack8(&offset, mca_btl_openib_component.ib_num_btls);
opal_output(-1, "modex sending %d btls (packed: %d, offset now at %d)", mca_btl_openib_component.ib_num_btls, *((uint8_t*) message), (int) (offset - message));
/* Pack each of the ports */
/* Pack each of the modules */
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
/* Pack the port struct */
memcpy(offset, &mca_btl_openib_component.openib_btls[i]->port_info, sizeof(mca_btl_openib_port_info_t));
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
MCA_BTL_OPENIB_PORT_INFO_HTON(*(mca_btl_openib_port_info_t *)offset);
#endif
offset += sizeof(mca_btl_openib_port_info_t);
/* Pack the strlen of the cpclist */
size = size_save =
strlen(mca_btl_openib_component.openib_btls[i]->port_info.cpclist);
/* Pack the modex common message struct. */
size = modex_message_size;
memcpy(offset,
&(mca_btl_openib_component.openib_btls[i]->port_info),
size);
opal_output(-1, "modex packed btl port modex message: %lx, %d, %d (size: %d)",
mca_btl_openib_component.openib_btls[i]->port_info.subnet_id,
mca_btl_openib_component.openib_btls[i]->port_info.mtu,
mca_btl_openib_component.openib_btls[i]->port_info.lid,
(int) size);
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
size = htonl(size);
MCA_BTL_OPENIB_MODEX_MSG_HTON(*(mca_btl_openib_modex_message_t *)offset);
#endif
memcpy(offset, &size, sizeof(size));
offset += sizeof(size);
offset += size;
opal_output(-1, "modex packed btl %d: modex message, offset now %d",
i, (int) (offset -message));
/* Pack the string */
memcpy(offset,
mca_btl_openib_component.openib_btls[i]->port_info.cpclist,
size_save);
offset += size_save;
/* Pack the number of CPCs that follow */
pack8(&offset,
mca_btl_openib_component.openib_btls[i]->num_cpcs);
opal_output(-1, "modex packed btl %d: to pack %d cpcs (packed: %d, offset now %d)",
i, mca_btl_openib_component.openib_btls[i]->num_cpcs,
*((uint8_t*) (offset - 1)), (int) (offset-message));
/* Pack each CPC */
for (j = 0;
j < mca_btl_openib_component.openib_btls[i]->num_cpcs;
++j) {
uint8_t u8;
cpc = mca_btl_openib_component.openib_btls[i]->cpcs[j];
opal_output(-1, "modex packed btl %d: packing cpc %s",
i, cpc->data.cbm_component->cbc_name);
/* Pack the CPC index */
u8 = ompi_btl_openib_connect_base_get_cpc_index(cpc->data.cbm_component);
pack8(&offset, u8);
opal_output(-1, "packing btl %d: cpc %d: index %d (packed %d, offset now %d)",
i, j, u8, *((uint8_t*) (offset-1)), (int)(offset-message));
/* Pack the CPC priority */
pack8(&offset, cpc->data.cbm_priority);
opal_output(-1, "packing btl %d: cpc %d: priority %d (packed %d, offset now %d)",
i, j, cpc->data.cbm_priority, *((uint8_t*) (offset-1)), (int)(offset-message));
/* Pack the blob length */
u8 = cpc->data.cbm_modex_message_len;
pack8(&offset, u8);
opal_output(-1, "packing btl %d: cpc %d: message len %d (packed %d, offset now %d)",
i, j, u8, *((uint8_t*) (offset-1)), (int)(offset-message));
/* If the blob length is > 0, pack the blob */
if (u8 > 0) {
memcpy(offset, cpc->data.cbm_modex_message, u8);
offset += u8;
opal_output(-1, "packing btl %d: cpc %d: blob packed %d %x (offset now %d)",
i, j,
((uint32_t*)cpc->data.cbm_modex_message)[0],
((uint32_t*)cpc->data.cbm_modex_message)[1],
(int)(offset-message));
}
/* Sanity check */
assert((size_t) (offset - message) <= msg_size);
}
}
/* All done -- send it! */
rc = ompi_modex_send(&mca_btl_openib_component.super.btl_version,
message, msg_size);
free(message);
opal_output(-1, "Modex sent! %d calculated, %d actual\n", (int) msg_size, (int) (offset - message));
return rc;
}
@ -253,7 +331,7 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
case MCA_BTL_OPENIB_CONTROL_RDMA:
rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)ctl_hdr;
BTL_VERBOSE(("prior to NTOH received rkey %lu, rdma_start.lval %llu, pval %p, ival %u\n",
BTL_VERBOSE(("prior to NTOH received rkey %lu, rdma_start.lval %llu, pval %p, ival %u",
rdma_hdr->rkey,
(unsigned long) rdma_hdr->rdma_start.lval,
rdma_hdr->rdma_start.pval,
@ -265,7 +343,7 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
}
BTL_VERBOSE(("received rkey %lu, rdma_start.lval %llu, pval %p,"
" ival %u\n", rdma_hdr->rkey,
" ival %u", rdma_hdr->rkey,
(unsigned long) rdma_hdr->rdma_start.lval,
rdma_hdr->rdma_start.pval, rdma_hdr->rdma_start.ival));
@ -330,7 +408,7 @@ static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
if(openib_reg->mr != NULL) {
if(ibv_dereg_mr(openib_reg->mr)) {
BTL_ERROR(("%s: error unpinning openib memory errno says %s\n",
BTL_ERROR(("%s: error unpinning openib memory errno says %s",
__func__, strerror(errno)));
return OMPI_ERROR;
}
@ -382,8 +460,21 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
uint64_t subnet_id;
ibv_query_gid(hca->ib_dev_context, port_num, 0, &gid);
/* If we have struct ibv_device.transport_type, then we're >= OFED
v1.2, and it could be iWarp of IB. If we don't have that
member, then we're < OFED v1.2, and it can only be IB. */
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
if (IBV_TRANSPORT_IWARP == hca->ib_dev->transport_type) {
subnet_id = 0;
} else {
subnet_id = ntoh64(gid.global.subnet_prefix);
}
#else
subnet_id = ntoh64(gid.global.subnet_prefix);
BTL_VERBOSE(("my subnet_id is %016x\n", subnet_id));
#endif
BTL_VERBOSE(("my subnet_id is %016x", subnet_id));
if(mca_btl_openib_component.ib_num_btls > 0 &&
IB_DEFAULT_GID_PREFIX == subnet_id &&
@ -401,7 +492,10 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
}
#if OMPI_HAVE_THREADS
/* APM support */
/* APM support -- only meaningful if async event support is
enabled. If async events are not enabled, then there's nothing
to listen for the APM event to load the new path, so it's not
worth enabling APM. */
if (lmc > 1){
if (-1 == mca_btl_openib_component.apm_lmc) {
lmc_step = lmc;
@ -430,7 +524,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
openib_btl = malloc(sizeof(mca_btl_openib_module_t));
if(NULL == openib_btl) {
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
}
memcpy(openib_btl, &mca_btl_openib_module,
@ -445,15 +539,22 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
openib_btl->lid = lid;
openib_btl->apm_port = 0;
openib_btl->src_path_bits = lid - ib_port_attr->lid;
/* store the subnet for multi-nic support */
openib_btl->port_info.subnet_id = subnet_id;
openib_btl->port_info.mtu = hca->mtu;
/* This code is protected with ifdef because we don't want to send
* extra bytes during OOB */
openib_btl->port_info.lid = lid;
rc = ompi_btl_openib_connect_base_query(&openib_btl->port_info.cpclist, hca);
if (OMPI_SUCCESS != rc) {
openib_btl->cpcs = NULL;
openib_btl->num_cpcs = 0;
/* Do we have at least one CPC that can handle this
port? */
rc =
ompi_btl_openib_connect_base_select_for_local_port(openib_btl);
if (OMPI_ERR_NOT_SUPPORTED == rc) {
continue;
} else if (OMPI_SUCCESS != rc) {
return rc;
}
mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbfunc = btl_openib_control;
@ -569,7 +670,7 @@ static void hca_construct(mca_btl_openib_hca_t *hca)
hca->ib_dev_context = NULL;
hca->ib_pd = NULL;
hca->mpool = NULL;
#if OMPI_ENABLE_PROGRESS_THREADS == 1
#if OMPI_ENABLE_PROGRESS_THREADS
hca->ib_channel = NULL;
#endif
hca->btls = 0;
@ -658,7 +759,7 @@ static int prepare_hca_for_use(mca_btl_openib_hca_t *hca)
calloc(mca_btl_openib_component.max_eager_rdma * hca->btls,
sizeof(mca_btl_openib_endpoint_t*));
if(NULL == hca->eager_rdma_buffers) {
BTL_ERROR(("Memory allocation fails\n"));
BTL_ERROR(("Memory allocation fails"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
@ -895,26 +996,27 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
hca = OBJ_NEW(mca_btl_openib_hca_t);
if(NULL == hca){
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
}
hca->ib_dev = ib_dev;
hca->ib_dev_context = ibv_open_device(ib_dev);
hca->ib_pd = NULL;
hca->hca_btls = OBJ_NEW(opal_pointer_array_t);
if (OPAL_SUCCESS != opal_pointer_array_init(hca->hca_btls, 2, INT_MAX, 2)) {
BTL_ERROR(("Failed to initialize hca_btls array: %s:%d\n", __FILE__, __LINE__));
BTL_ERROR(("Failed to initialize hca_btls array: %s:%d", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
}
if(NULL == hca->ib_dev_context){
BTL_ERROR(("error obtaining device context for %s errno says %s\n",
BTL_ERROR(("error obtaining device context for %s errno says %s",
ibv_get_device_name(ib_dev), strerror(errno)));
goto error;
}
if(ibv_query_device(hca->ib_dev_context, &hca->ib_dev_attr)){
BTL_ERROR(("error obtaining device attributes for %s errno says %s\n",
BTL_ERROR(("error obtaining device attributes for %s errno says %s",
ibv_get_device_name(ib_dev), strerror(errno)));
goto error;
}
@ -991,7 +1093,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
hca->mtu = IBV_MTU_4096;
break;
default:
BTL_ERROR(("invalid MTU value specified in INI file (%d); ignored\n", values.mtu));
BTL_ERROR(("invalid MTU value specified in INI file (%d); ignored", values.mtu));
hca->mtu = mca_btl_openib_component.ib_mtu;
break;
}
@ -1007,7 +1109,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
/* Allocate the protection domain for the HCA */
hca->ib_pd = ibv_alloc_pd(hca->ib_dev_context);
if(NULL == hca->ib_pd){
BTL_ERROR(("error allocating protection domain for %s errno says %s\n",
BTL_ERROR(("error allocating protection domain for %s errno says %s",
ibv_get_device_name(ib_dev), strerror(errno)));
goto error;
}
@ -1029,15 +1131,15 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
mca_mpool_base_module_create(mca_btl_openib_component.ib_mpool_name,
hca, &mpool_resources);
if(NULL == hca->mpool){
BTL_ERROR(("error creating IB memory pool for %s errno says %s\n",
BTL_ERROR(("error creating IB memory pool for %s errno says %s",
ibv_get_device_name(ib_dev), strerror(errno)));
goto error;
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
#if OMPI_ENABLE_PROGRESS_THREADS
hca->ib_channel = ibv_create_comp_channel(hca->ib_dev_context);
if (NULL == hca->ib_channel) {
BTL_ERROR(("error creating channel for %s errno says %s\n",
BTL_ERROR(("error creating channel for %s errno says %s",
ibv_get_device_name(hca->ib_dev),
strerror(errno)));
goto error;
@ -1100,7 +1202,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
}
error:
#if defined(OMPI_HAVE_THREADS) && OMPI_ENABLE_PROGRESS_THREADS == 1
#if OMPI_ENABLE_PROGRESS_THREADS
if (hca->ib_channel) {
ibv_destroy_comp_channel(hca->ib_channel);
}
@ -1191,7 +1293,7 @@ static struct ibv_device **ibv_get_device_list_compat(int *num_devs)
ib_devs = (struct ibv_device**)malloc(*num_devs * sizeof(struct ibv_dev*));
if(NULL == ib_devs) {
*num_devs = 0;
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return NULL;
}
@ -1335,8 +1437,13 @@ btl_openib_component_init(int *num_btl_modules,
if (OMPI_SUCCESS != (ret = ompi_btl_openib_ini_init())) {
goto no_btls;
}
/* Init CPC components */
if (OMPI_SUCCESS != (ret = ompi_btl_openib_connect_base_init())) {
goto no_btls;
}
if(MCA_BTL_XRC_ENABLED) {
if (MCA_BTL_XRC_ENABLED) {
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_addr_table,
opal_hash_table_t);
}
@ -1435,6 +1542,11 @@ btl_openib_component_init(int *num_btl_modules,
opal_argv_copy(mca_btl_openib_component.if_exclude_list);
}
/* Initialize FD listening */
if (OMPI_SUCCESS != ompi_btl_openib_fd_init()) {
goto no_btls;
}
ib_devs = ibv_get_device_list_compat(&num_devs);
if(0 == num_devs || NULL == ib_devs) {
@ -1499,14 +1611,14 @@ btl_openib_component_init(int *num_btl_modules,
malloc(sizeof(mca_btl_openib_module_t*) *
mca_btl_openib_component.ib_num_btls);
if(NULL == mca_btl_openib_component.openib_btls) {
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return NULL;
}
btls = (struct mca_btl_base_module_t **)
malloc(mca_btl_openib_component.ib_num_btls *
sizeof(struct mca_btl_base_module_t*));
if(NULL == btls) {
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return NULL;
}
@ -1542,8 +1654,11 @@ btl_openib_component_init(int *num_btl_modules,
/* If we fail early enough in the setup, we just modex around that
there are no openib BTL's in this process and return NULL. */
if (MCA_BTL_XRC_ENABLED)
if (MCA_BTL_XRC_ENABLED) {
OBJ_DESTRUCT(&mca_btl_openib_component.ib_addr_table);
}
/* Be sure to shut down the fd listener */
ompi_btl_openib_fd_finalize();
mca_btl_openib_component.ib_num_btls = 0;
btl_openib_modex_send();
@ -1997,12 +2112,12 @@ static int poll_hca(mca_btl_openib_hca_t* hca, int count)
return count;
error:
BTL_ERROR(("error polling %s with %d errno says %s\n", cq_name[cq], ne,
BTL_ERROR(("error polling %s with %d errno says %s", cq_name[cq], ne,
strerror(errno)));
return count;
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
#if OMPI_ENABLE_PROGRESS_THREADS
void* mca_btl_openib_progress_thread(opal_object_t* arg)
{
opal_thread_t* thread = (opal_thread_t*)arg;
@ -2014,7 +2129,7 @@ void* mca_btl_openib_progress_thread(opal_object_t* arg)
pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, NULL );
pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, NULL );
opal_output(0, "WARNING: the openib btl progress thread code *does not yet work*. Your run is likely to hang, crash, break the kitchen sink, and/or eat your cat. You have been warned.");
opal_output(-1, "WARNING: the openib btl progress thread code *does not yet work*. Your run is likely to hang, crash, break the kitchen sink, and/or eat your cat. You have been warned.");
while (hca->progress) {
while(opal_progress_threads()) {

Просмотреть файл

@ -42,6 +42,7 @@
#include "btl_openib_proc.h"
#include "btl_openib_xrc.h"
#include "btl_openib_async.h"
#include "connect/connect.h"
static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
@ -350,8 +351,9 @@ static void endpoint_init_qp(mca_btl_base_endpoint_t *ep, const int qp)
endpoint_init_qp_srq(ep_qp, qp);
break;
case MCA_BTL_OPENIB_XRC_QP:
if(NULL == ep->ib_addr->qp)
if (NULL == ep->ib_addr->qp) {
ep->ib_addr->qp = endpoint_alloc_qp();
}
endpoint_init_qp_xrc(ep, qp);
break;
default:
@ -361,7 +363,10 @@ static void endpoint_init_qp(mca_btl_base_endpoint_t *ep, const int qp)
}
void mca_btl_openib_endpoint_init(mca_btl_openib_module_t *btl,
mca_btl_base_endpoint_t *ep)
mca_btl_base_endpoint_t *ep,
ompi_btl_openib_connect_base_module_t *local_cpc,
mca_btl_openib_proc_modex_t *remote_proc_info,
ompi_btl_openib_connect_base_module_data_t *remote_cpc_data)
{
int qp;
@ -369,9 +374,20 @@ void mca_btl_openib_endpoint_init(mca_btl_openib_module_t *btl,
ep->use_eager_rdma = btl->hca->use_eager_rdma &
mca_btl_openib_component.use_eager_rdma;
ep->subnet_id = btl->port_info.subnet_id;
ep->endpoint_local_cpc = local_cpc;
ep->endpoint_remote_cpc_data = remote_cpc_data;
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++)
ep->rem_info.rem_lid = remote_proc_info->pm_port_info.lid;
ep->rem_info.rem_subnet_id = remote_proc_info->pm_port_info.subnet_id;
ep->rem_info.rem_mtu = remote_proc_info->pm_port_info.mtu;
opal_output(-1, "Got remote LID, subnet, MTU: %d, %lx, %d",
ep->rem_info.rem_lid,
ep->rem_info.rem_subnet_id,
ep->rem_info.rem_mtu);
for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
endpoint_init_qp(ep, qp);
}
}
static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
@ -397,6 +413,8 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
endpoint->xrc_recv_qp_num = 0;
endpoint->endpoint_btl = 0;
endpoint->endpoint_proc = 0;
endpoint->endpoint_local_cpc = NULL;
endpoint->endpoint_remote_cpc_data = NULL;
endpoint->endpoint_tstamp = 0.0;
endpoint->endpoint_state = MCA_BTL_IB_CLOSED;
endpoint->endpoint_retries = 0;
@ -434,6 +452,11 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
bool pval_clean = false;
int qp;
/* If the CPC has an endpoint_finalize function, call it */
if (NULL != endpoint->endpoint_local_cpc->cbm_endpoint_finalize) {
endpoint->endpoint_local_cpc->cbm_endpoint_finalize(endpoint);
}
/* Release memory resources */
do {
/* Make sure that mca_btl_openib_endpoint_connect_eager_rdma ()
@ -480,6 +503,7 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
/* free the qps */
free(endpoint->qps);
endpoint->qps = NULL;
/* unregister xrc recv qp */
#if HAVE_XRC
@ -548,7 +572,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
}
/* Run over all qps and load alternative path */
#if OMPI_HAVE_THREADS
#if OMPI_HAVE_THREAD_SUPPORT
if (APM_ENABLED) {
int i;
if (MCA_BTL_XRC_ENABLED) {
@ -589,7 +613,9 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
while(master && !opal_list_is_empty(&endpoint->ib_addr->pending_ep)) {
ep_item = opal_list_remove_first(&endpoint->ib_addr->pending_ep);
ep = (mca_btl_openib_endpoint_t *)ep_item;
if (OMPI_SUCCESS != ompi_btl_openib_connect.bcf_start_connect(ep)) {
if (OMPI_SUCCESS !=
endpoint->endpoint_local_cpc->cbm_start_connect(endpoint->endpoint_local_cpc,
ep)) {
BTL_ERROR(("Failed to connect pending endpoint\n"));
}
}

Просмотреть файл

@ -39,6 +39,7 @@
BEGIN_C_DECLS
struct mca_btl_openib_frag_t;
struct mca_btl_openib_proc_modex_t;
/**
* State of IB endpoint connection.
@ -68,37 +69,38 @@ typedef enum {
MCA_BTL_IB_FAILED
} mca_btl_openib_endpoint_state_t;
struct mca_btl_openib_rem_qp_info_t {
typedef struct mca_btl_openib_rem_qp_info_t {
uint32_t rem_qp_num;
/* Remote QP number */
uint32_t rem_psn;
/* Remote processes port sequence number */
}; typedef struct mca_btl_openib_rem_qp_info_t mca_btl_openib_rem_qp_info_t;
} mca_btl_openib_rem_qp_info_t;
struct mca_btl_openib_rem_srq_info_t {
typedef struct mca_btl_openib_rem_srq_info_t {
/* Remote SRQ number */
uint32_t rem_srq_num;
}; typedef struct mca_btl_openib_rem_srq_info_t mca_btl_openib_rem_srq_info_t;
} mca_btl_openib_rem_srq_info_t;
struct mca_btl_openib_rem_info_t {
uint16_t rem_lid;
typedef struct mca_btl_openib_rem_info_t {
/* Local identifier of the remote process */
uint64_t rem_subnet_id;
uint16_t rem_lid;
/* subnet id of remote process */
uint32_t rem_mtu;
uint64_t rem_subnet_id;
/* MTU of remote process */
uint32_t rem_index;
uint32_t rem_mtu;
/* index of remote endpoint in endpoint array */
uint32_t rem_index;
/* Remote QPs */
mca_btl_openib_rem_qp_info_t *rem_qps;
/* remote xrc_srq info , used only with xrc connections */
/* Remote xrc_srq info, used only with XRC connections */
mca_btl_openib_rem_srq_info_t *rem_srqs;
}; typedef struct mca_btl_openib_rem_info_t mca_btl_openib_rem_info_t;
} mca_btl_openib_rem_info_t;
/**
* Agggregates all per peer qp info for an endpoint
*/
struct mca_btl_openib_endpoint_pp_qp_t {
typedef struct mca_btl_openib_endpoint_pp_qp_t {
int32_t sd_credits; /**< this rank's view of the credits
* available for sending:
* this is the credits granted by the
@ -110,15 +112,15 @@ struct mca_btl_openib_endpoint_pp_qp_t {
int32_t cm_received; /**< Credit messages received */
int32_t cm_return; /**< how may credits to return */
int32_t cm_sent; /**< Outstanding number of credit messages */
}; typedef struct mca_btl_openib_endpoint_pp_qp_t mca_btl_openib_endpoint_pp_qp_t;
} mca_btl_openib_endpoint_pp_qp_t;
/**
* Aggregates all srq qp info for an endpoint
*/
struct mca_btl_openib_endpoint_srq_qp_t {
typedef struct mca_btl_openib_endpoint_srq_qp_t {
int32_t dummy;
}; typedef struct mca_btl_openib_endpoint_srq_qp_t mca_btl_openib_endpoint_srq_qp_t;
} mca_btl_openib_endpoint_srq_qp_t;
typedef struct mca_btl_openib_qp_t {
struct ibv_qp *lcl_qp;
@ -152,61 +154,70 @@ typedef struct mca_btl_openib_endpoint_qp_t {
struct mca_btl_base_endpoint_t {
opal_list_item_t super;
/** BTL module that created this connection */
struct mca_btl_openib_module_t* endpoint_btl;
/**< BTL instance that created this connection */
/** proc structure corresponding to endpoint */
struct mca_btl_openib_proc_t* endpoint_proc;
/**< proc structure corresponding to endpoint */
/** local CPC to connect to this endpoint */
ompi_btl_openib_connect_base_module_t *endpoint_local_cpc;
/** hook for local CPC to hang endpoint-specific data */
void *endpoint_local_cpc_data;
/** pointer to remote CPC's data (essentially its CPC modex message) */
ompi_btl_openib_connect_base_module_data_t *endpoint_remote_cpc_data;
/** current state of the connection */
mca_btl_openib_endpoint_state_t endpoint_state;
/**< current state of the connection */
/** number of connection retries attempted */
size_t endpoint_retries;
/**< number of connection retries attempted */
/** timestamp of when the first connection was attempted */
double endpoint_tstamp;
/**< timestamp of when the first connection was attempted */
/** lock for concurrent access to endpoint state */
opal_mutex_t endpoint_lock;
/**< lock for concurrent access to endpoint state */
/** list of pending frags due to lazy connection establishment
for this endpotint */
opal_list_t pending_lazy_frags;
/**< list of pending frags due to lazy connection establishment
* for this endpotint
*/
mca_btl_openib_endpoint_qp_t *qps;
uint32_t xrc_recv_qp_num; /* in xrc we will use it as recv qp */
uint32_t xrc_recv_psn;
/** list of pending rget ops */
opal_list_t pending_get_frags;
/** list of pending rput ops */
opal_list_t pending_put_frags;
opal_list_t pending_get_frags; /**< list of pending rget ops */
opal_list_t pending_put_frags; /**< list of pending rput ops */
/** number of available get tokens */
int32_t get_tokens;
/** subnet id of this endpoint*/
uint64_t subnet_id;
/** used only for xrc; pointer to struct that keeps remote port
info */
struct ib_address_t *ib_addr;
/* Local processes port sequence number (Low and High) */
int32_t get_tokens; /**< number of available get tokens */
uint64_t subnet_id; /**< subnet id of this endpoint*/
struct ib_address_t *ib_addr; /**< used only for xrc; pointer to struct
that keeps remote port info */
int32_t eager_recv_count; /**< number of eager received */
/** number of eager received */
int32_t eager_recv_count;
/** info about remote RDMA buffer */
mca_btl_openib_eager_rdma_remote_t eager_rdma_remote;
/**< info about remote RDMA buffer */
/** info about local RDMA buffer */
mca_btl_openib_eager_rdma_local_t eager_rdma_local;
/**< info about local RDMA buffer */
int32_t index; /**< index of the endpoint in endpoints array */
/** index of the endpoint in endpoints array */
int32_t index;
/**< frags for sending explicit high priority credits */
bool nbo; /**< does the endpoint require network byte ordering? */
bool use_eager_rdma; /**< use eager rdma for this peer? */
/** does the endpoint require network byte ordering? */
bool nbo;
/** use eager rdma for this peer? */
bool use_eager_rdma;
/** information about the remote port */
mca_btl_openib_rem_info_t rem_info;
};
@ -234,12 +245,15 @@ void mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t*);
int mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t*);
void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t*);
void mca_btl_openib_endpoint_init(mca_btl_openib_module_t*,
mca_btl_base_endpoint_t*);
mca_btl_base_endpoint_t*,
ompi_btl_openib_connect_base_module_t *local_cpc,
struct mca_btl_openib_proc_modex_t *remote_proc_info,
ompi_btl_openib_connect_base_module_data_t *remote_cpc_data);
static inline int post_recvs(mca_btl_base_endpoint_t *ep, const int qp,
const int num_post)
{
int i;
int i, rc;
struct ibv_recv_wr *bad_wr, *wr_list = NULL, *wr = NULL;
mca_btl_openib_module_t *openib_btl = ep->endpoint_btl;
@ -260,10 +274,11 @@ static inline int post_recvs(mca_btl_base_endpoint_t *ep, const int qp,
wr->next = NULL;
if(!ibv_post_recv(ep->qps[qp].qp->lcl_qp, wr_list, &bad_wr))
rc = ibv_post_recv(ep->qps[qp].qp->lcl_qp, wr_list, &bad_wr);
if (0 == rc)
return OMPI_SUCCESS;
BTL_ERROR(("error posting receive on qp %d\n", qp));
BTL_ERROR(("error %d posting receive on qp %d\n", rc, qp));
return OMPI_ERROR;
}
@ -361,9 +376,10 @@ static inline int check_endpoint_state(mca_btl_openib_endpoint_t *ep,
switch(ep->endpoint_state) {
case MCA_BTL_IB_CLOSED:
rc = ompi_btl_openib_connect.bcf_start_connect(ep);
if(rc == OMPI_SUCCESS)
rc = ep->endpoint_local_cpc->cbm_start_connect(ep->endpoint_local_cpc, ep);
if (OMPI_SUCCESS == rc) {
rc = ORTE_ERR_RESOURCE_BUSY;
}
/*
* As long as we expect a message from the peer (in order
* to setup the connection) let the event engine pool the

485
ompi/mca/btl/openib/btl_openib_fd.c Обычный файл
Просмотреть файл

@ -0,0 +1,485 @@
/*
* Copyright (c) 2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <pthread.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include "opal/class/opal_list.h"
#include "opal/event/event.h"
#include "opal/util/output.h"
#include "ompi/constants.h"
#include "btl_openib_fd.h"
/*
* Data for each registered item
*/
typedef struct {
opal_list_item_t super;
bool ri_event_used;
opal_event_t ri_event;
int ri_fd;
int ri_flags;
union {
ompi_btl_openib_fd_callback_fn_t *fd;
ompi_btl_openib_schedule_callback_fn_t *schedule;
} ri_callback;
void *ri_context;
} registered_item_t;
static OBJ_CLASS_INSTANCE(registered_item_t, opal_list_item_t, NULL, NULL);
/*
* Command types
*/
typedef enum {
CMD_TIME_TO_QUIT,
CMD_ADD_FD,
CMD_REMOVE_FD,
CMD_MAX
} cmd_type_t;
/*
* Commands. Fields ordered to avoid memory holes (and valgrind warnings).
*/
typedef struct {
ompi_btl_openib_fd_callback_fn_t *pc_callback;
void *pc_context;
int pc_fd;
int pc_flags;
cmd_type_t pc_cmd;
char end;
} cmd_t;
static bool initialized = false;
static int cmd_size = 0;
static fd_set read_fds, write_fds;
static int max_fd;
static opal_list_t registered_items;
/* These items are only used in the threaded version */
static pthread_t thread;
static int pipe_fd[2] = { -1, -1 };
static void libevent_fd_callback(int fd, short event, void *context)
{
registered_item_t *ri = (registered_item_t*) context;
ri->ri_callback.fd(fd, event, ri->ri_context);
}
static void libevent_event_callback(int fd, short event, void *context)
{
registered_item_t *ri = (registered_item_t*) context;
ri->ri_callback.schedule(ri->ri_context);
/* JMS Can I free ri now? It contains the event... */
#if 0
OBJ_RELEASE(ri);
#endif
}
/*
* Add an fd to the listening set
*/
static int local_pipe_cmd_add_fd(bool use_libevent, cmd_t *cmd)
{
registered_item_t *ri = OBJ_NEW(registered_item_t);
if (NULL == ri) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
ri->ri_event_used = false;
ri->ri_fd = cmd->pc_fd;
ri->ri_flags = cmd->pc_flags;
ri->ri_callback.fd = cmd->pc_callback;
ri->ri_context = cmd->pc_context;
if (use_libevent) {
/* Make an event for this fd */
ri->ri_event_used = true;
memset(&ri->ri_event, 0, sizeof(ri->ri_event));
opal_event_set(&ri->ri_event, ri->ri_fd,
ri->ri_flags | OPAL_EV_PERSIST, libevent_fd_callback,
ri);
opal_event_add(&ri->ri_event, 0);
} else {
/* Add the fd to the relevant fd local sets and update max_fd */
if (OPAL_EV_READ & ri->ri_flags) {
FD_SET(ri->ri_fd, &read_fds);
}
if (OPAL_EV_WRITE & cmd->pc_flags) {
FD_SET(ri->ri_fd, &write_fds);
}
max_fd = (max_fd > ri->ri_fd) ? max_fd : ri->ri_fd + 1;
}
opal_list_append(&registered_items, &ri->super);
return OMPI_SUCCESS;
}
/*
* Remove an fd from the listening set
*/
static int local_pipe_cmd_remove_fd(cmd_t *cmd)
{
int i;
opal_list_item_t *item;
registered_item_t *ri;
/* Go through the list of registered fd's and find the fd to
remove */
for (item = opal_list_get_first(&registered_items);
NULL != opal_list_get_end(&registered_items);
item = opal_list_get_next(item)) {
ri = (registered_item_t*) item;
if (cmd->pc_fd == ri->ri_fd) {
/* Found it. The item knows if it was used as a libevent
event or an entry in the local fd sets. */
if (ri->ri_event_used) {
/* Remove this event from libevent */
opal_event_del(&ri->ri_event);
} else {
/* Remove this item from the fd_sets and recalculate
max_fd */
FD_CLR(cmd->pc_fd, &read_fds);
FD_CLR(cmd->pc_fd, &write_fds);
for (max_fd = i = pipe_fd[0]; i < FD_SETSIZE; ++i) {
if (FD_ISSET(i, &read_fds) || FD_ISSET(i, &write_fds)) {
max_fd = i + 1;
}
}
}
/* Let the caller know that we have stopped monitoring
this fd (if they care) */
if (NULL != cmd->pc_callback) {
cmd->pc_callback(cmd->pc_fd, 0, cmd->pc_context);
}
/* Remove this item from the list of registered items and
release it */
opal_list_remove_item(&registered_items, item);
OBJ_RELEASE(item);
return OMPI_SUCCESS;
}
}
/* This shouldn't happen */
return OMPI_ERR_NOT_FOUND;
}
/*
* Simple loop over reading from a fd
*/
static int read_fd(int fd, int len, void *buffer)
{
int rc;
char *b = buffer;
while (len > 0) {
rc = read(fd, b, len);
if (rc < 0 && EAGAIN == errno) {
continue;
} else if (rc > 0) {
len -= rc;
b += rc;
} else {
return OMPI_ERROR;
}
}
return OMPI_SUCCESS;
}
/*
* Simple loop over writing to an fd
*/
static int write_fd(int fd, int len, void *buffer)
{
int rc;
char *b = buffer;
while (len > 0) {
rc = write(fd, b, len);
if (rc < 0 && EAGAIN == errno) {
continue;
} else if (rc > 0) {
len -= rc;
b += rc;
} else {
return OMPI_ERROR;
}
}
return OMPI_SUCCESS;
}
/*
* Act on pipe commands
*/
static bool local_pipe_cmd(void)
{
bool ret = false;
cmd_t cmd;
read_fd(pipe_fd[0], cmd_size, &cmd);
switch (cmd.pc_cmd) {
case CMD_ADD_FD:
if (OMPI_SUCCESS != local_pipe_cmd_add_fd(false, &cmd)) {
ret = true;
}
break;
case CMD_REMOVE_FD:
if (OMPI_SUCCESS != local_pipe_cmd_remove_fd(&cmd)) {
ret = true;
}
break;
case CMD_TIME_TO_QUIT:
opal_output(-1, "fd listener thread: time to quit");
ret = true;
break;
default:
opal_output(-1, "fd listener thread: unknown pipe command!");
break;
}
return ret;
}
/*
* Main thread logic
*/
static void *thread_main(void *context)
{
int rc, flags;
fd_set read_fds_copy, write_fds_copy;
opal_list_item_t *item;
registered_item_t *ri;
/* Make an fd set that we can select() on */
FD_ZERO(&write_fds);
FD_ZERO(&read_fds);
FD_SET(pipe_fd[0], &read_fds);
max_fd = pipe_fd[0] + 1;
opal_output(-1, "fd listener thread running");
/* Main loop waiting for commands over the fd's */
while (1) {
memcpy(&read_fds_copy, &read_fds, sizeof(read_fds));
memcpy(&write_fds_copy, &write_fds, sizeof(write_fds));
opal_output(-1, "fd listener thread blocking on select...");
rc = select(max_fd, &read_fds_copy, &write_fds_copy, NULL, NULL);
if (0 != rc && EAGAIN == errno) {
continue;
}
opal_output(-1, "fd listener thread woke up!");
if (rc > 0) {
if (FD_ISSET(pipe_fd[0], &read_fds_copy)) {
opal_output(-1, "fd listener thread: pipe command");
if (local_pipe_cmd()) {
opal_output(-1, "fd listener thread: exiting");
break;
}
}
/* Go through all the registered events and see who had
activity */
if (!opal_list_is_empty(&registered_items)) {
for (item = opal_list_get_first(&registered_items);
item != opal_list_get_end(&registered_items);
item = opal_list_get_next(item)) {
ri = (registered_item_t*) item;
flags = 0;
/* See if this fd was ready for reading or writing
(fd's will only be in the read_fds or write_fds
set depending on what they registered for) */
if (FD_ISSET(ri->ri_fd, &read_fds_copy)) {
flags |= OPAL_EV_READ;
}
if (FD_ISSET(ri->ri_fd, &write_fds_copy)) {
flags |= OPAL_EV_WRITE;
}
/* If either was ready, invoke the callback */
if (0 != flags) {
opal_output(-1, "fd listener thread: invoking callback for registered fd %d", ri->ri_fd);
ri->ri_callback.fd(ri->ri_fd, flags,
ri->ri_context);
}
}
}
}
}
/* All done */
return NULL;
}
/*
* Initialize
*/
int ompi_btl_openib_fd_init(void)
{
if (!initialized) {
cmd_t bogus;
OBJ_CONSTRUCT(&registered_items, opal_list_t);
if (OMPI_HAVE_THREAD_SUPPORT) {
/* Create a pipe to communicate with the thread */
if (0 != pipe(pipe_fd)) {
return OMPI_ERR_IN_ERRNO;
}
if (0 != pthread_create(&thread, NULL, thread_main, NULL)) {
return OMPI_ERR_IN_ERRNO;
}
}
/* Calculate the real size of the cmd struct */
cmd_size = (int) (&(bogus.end) - ((char*) &bogus));
initialized = true;
}
return OMPI_SUCCESS;
}
/*
* Start monitoring an fd
*/
int ompi_btl_openib_fd_monitor(int fd, int flags,
ompi_btl_openib_fd_callback_fn_t *callback,
void *context)
{
cmd_t cmd;
/* Sanity check */
if (fd < 0 || 0 == flags || NULL == callback) {
return OMPI_ERR_BAD_PARAM;
}
cmd.pc_cmd = CMD_ADD_FD;
cmd.pc_fd = fd;
cmd.pc_flags = flags;
cmd.pc_callback = callback;
cmd.pc_context = context;
if (OMPI_HAVE_THREAD_SUPPORT) {
/* For the threaded version, write a command down the pipe */
write_fd(pipe_fd[1], cmd_size, &cmd);
} else {
/* Otherwise, add it directly */
local_pipe_cmd_add_fd(true, &cmd);
}
return OMPI_SUCCESS;
}
/*
* Stop monitoring an fd
*/
int ompi_btl_openib_fd_unmonitor(int fd,
ompi_btl_openib_fd_callback_fn_t *callback,
void *context)
{
cmd_t cmd;
/* Sanity check */
if (fd < 0) {
return OMPI_ERR_BAD_PARAM;
}
cmd.pc_cmd = CMD_REMOVE_FD;
cmd.pc_fd = fd;
cmd.pc_flags = 0;
cmd.pc_callback = callback;
cmd.pc_context = context;
if (OMPI_HAVE_THREAD_SUPPORT) {
/* For the threaded version, write a command down the pipe */
write_fd(pipe_fd[1], cmd_size, &cmd);
} else {
/* Otherwise, remove it directly */
local_pipe_cmd_remove_fd(&cmd);
}
return OMPI_SUCCESS;
}
/*
* Run a function in the main thread
*/
int ompi_btl_openib_fd_schedule(ompi_btl_openib_schedule_callback_fn_t *callback,
void *context)
{
if (OMPI_HAVE_THREAD_SUPPORT) {
/* For the threaded version, schedule an event for "now" */
registered_item_t *ri;
struct timeval now;
ri = OBJ_NEW(registered_item_t);
if (NULL == ri) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Create an event that will run in the main thread */
ri->ri_fd = ri->ri_flags = -1;
ri->ri_callback.schedule = callback;
ri->ri_context = context;
ri->ri_event_used = true;
opal_evtimer_set(&ri->ri_event, libevent_event_callback, ri);
now.tv_sec = 0;
now.tv_usec = 0;
opal_evtimer_add(&ri->ri_event, &now);
} else {
/* For the non-threaded version, just call the function */
callback(context);
}
return OMPI_SUCCESS;
}
/*
* Finalize
*/
int ompi_btl_openib_fd_finalize(void)
{
if (initialized) {
if (OMPI_HAVE_THREAD_SUPPORT) {
/* For the threaded version, send a command down the pipe */
cmd_t cmd;
memset(&cmd, 0, cmd_size);
cmd.pc_cmd = CMD_TIME_TO_QUIT;
write_fd(pipe_fd[1], cmd_size, &cmd);
pthread_join(thread, NULL);
close(pipe_fd[0]);
close(pipe_fd[1]);
}
}
initialized = false;
return OMPI_SUCCESS;
}

61
ompi/mca/btl/openib/btl_openib_fd.h Обычный файл
Просмотреть файл

@ -0,0 +1,61 @@
/*
* Copyright (c) 2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_BTL_OPENIB_FD_H_
#define OMPI_BTL_OPENIB_FD_H_
#include "ompi_config.h"
BEGIN_C_DECLS
/**
* Typedef for fd callback function
*/
typedef void *(ompi_btl_openib_fd_callback_fn_t)(int fd, int flags,
void *context);
/**
* Typedef for generic callback function
*/
typedef void *(ompi_btl_openib_schedule_callback_fn_t)(void *context);
/**
* Initialize fd monitoring
*/
int ompi_btl_openib_fd_init(void);
/**
* Start monitoring an fd
*/
int ompi_btl_openib_fd_monitor(int fd, int flags,
ompi_btl_openib_fd_callback_fn_t *callback,
void *context);
/**
* Stop monitoring an fd
*/
int ompi_btl_openib_fd_unmonitor(int fd,
ompi_btl_openib_fd_callback_fn_t *callback,
void *context);
/**
* Run a function in the main thread
*/
int ompi_btl_openib_fd_schedule(ompi_btl_openib_schedule_callback_fn_t callback,
void *context);
/**
* Finalize fd monitoring
*/
int ompi_btl_openib_fd_finalize(void);
END_C_DECLS
#endif

Просмотреть файл

@ -69,7 +69,7 @@ static inline int reg_string(const char* param_name, const char* param_desc,
default_value, &value);
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
opal_output(0, "Bad parameter value for parameter \"%s\"\n",
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
@ -96,7 +96,7 @@ static inline int reg_int(const char* param_name, const char* param_desc,
if ((0 != (flags & REGINT_GE_ZERO) && value < 0) ||
(0 != (flags & REGINT_GE_ONE) && value < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == value)) {
opal_output(0, "Bad parameter value for parameter \"%s\"\n",
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
@ -412,6 +412,15 @@ int btl_openib_register_mca_params(void)
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.apm_ports = (uint32_t) ival;
CHECK(reg_int("enable_apm_over_lmc", "Maximum number of alterative paths for each HCA port "
"(must be >= -1, where 0 = disable apm, -1 = all availible alternative paths )",
0, &ival, REGINT_NEG_ONE_OK|REGINT_GE_ZERO));
mca_btl_openib_component.apm_lmc = (uint32_t) ival;
CHECK(reg_int("enable_apm_over_ports", "Enable alterative path migration over different ports of the same HCA"
"(must be >= 0, where 0 = disable apm over ports , 1 = enable apm over ports of the same hca )",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.apm_ports = (uint32_t) ival;
CHECK(reg_int("use_async_event_thread",
"If nonzero, use the thread that will handle InfiniBand asyncihronous events ",
1, &ival, 0));
@ -610,7 +619,7 @@ static int mca_btl_openib_mca_setup_qps(void)
rd_win = atoi_param(P(4), (rd_num - rd_low) * 2);
rd_rsv = atoi_param(P(5), (rd_num * 2) / rd_win);
BTL_VERBOSE(("pp: rd_num is %d rd_low is %d rd_win %d rd_rsv %d\n",
BTL_VERBOSE(("pp: rd_num is %d rd_low is %d rd_win %d rd_rsv %d",
rd_num, rd_low, rd_win, rd_rsv));
/* Calculate the smallest freelist size that can be allowed */
@ -637,7 +646,7 @@ static int mca_btl_openib_mca_setup_qps(void)
/* by default set rd_low to be 3/4 of rd_num */
rd_low = atoi_param(P(3), rd_num - (rd_num / 4));
sd_max = atoi_param(P(4), rd_low / 4);
BTL_VERBOSE(("srq: rd_num is %d rd_low is %d sd_max is %d\n",
BTL_VERBOSE(("srq: rd_num is %d rd_low is %d sd_max is %d",
rd_num, rd_low, sd_max));
/* Calculate the smallest freelist size that can be allowed */
@ -682,7 +691,7 @@ static int mca_btl_openib_mca_setup_qps(void)
orte_process_info.nodename, max_qp_size,
max_size_needed);
opal_output(0, "The biggest QP size is bigger than maximum send size. "
"This is not optimal configuration as memory will be waisted.\n");
"This is not optimal configuration as memory will be wasted.");
}
if (mca_btl_openib_component.ib_free_list_max > 0 &&
@ -698,7 +707,7 @@ static int mca_btl_openib_mca_setup_qps(void)
mca_btl_openib_component.credits_qp = smallest_pp_qp;
/* Register any MCA params for the connect pseudo-components */
if (OMPI_SUCCESS != ompi_btl_openib_connect_base_open())
if (OMPI_SUCCESS != ompi_btl_openib_connect_base_register())
goto error;
ret = OMPI_SUCCESS;

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* $COPYRIGHT$
*
@ -27,6 +27,8 @@
#include "btl_openib.h"
#include "btl_openib_proc.h"
#include "connect/base.h"
#include "connect/connect.h"
static void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* proc);
static void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* proc);
@ -38,6 +40,7 @@ OBJ_CLASS_INSTANCE(mca_btl_openib_proc_t,
void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* proc)
{
proc->proc_ompi = 0;
proc->proc_ports = NULL;
proc->proc_port_count = 0;
proc->proc_endpoints = 0;
proc->proc_endpoint_count = 0;
@ -63,6 +66,17 @@ void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* proc)
if(NULL != proc->proc_endpoints) {
free(proc->proc_endpoints);
}
if (NULL != proc->proc_ports) {
int i, j;
for (i = 0; i < proc->proc_port_count; ++i) {
for (j = 0; j < proc->proc_ports[i].pm_cpc_data_count; ++j) {
if (NULL != proc->proc_ports[i].pm_cpc_data[j].cbm_modex_message) {
free(proc->proc_ports[i].pm_cpc_data[j].cbm_modex_message);
}
}
}
free(proc->proc_ports);
}
}
@ -90,12 +104,20 @@ static mca_btl_openib_proc_t* mca_btl_openib_proc_lookup_ompi(ompi_proc_t* ompi_
return NULL;
}
static void inline unpack8(char **src, uint8_t *value)
{
/* Copy one character */
*value = (uint8_t) **src;
/* Most the src ahead one */
++*src;
}
/*
* Create a IB process structure. There is a one-to-one correspondence
* between a ompi_proc_t and a mca_btl_openib_proc_t instance. We cache
* additional data (specifically the list of mca_btl_openib_endpoint_t instances,
* and published addresses) associated w/ a given destination on this
* datastructure.
* between a ompi_proc_t and a mca_btl_openib_proc_t instance. We
* cache additional data (specifically the list of
* mca_btl_openib_endpoint_t instances, and published addresses)
* associated w/ a given destination on this datastructure.
*/
mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
@ -103,10 +125,11 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
mca_btl_openib_proc_t* module_proc = NULL;
size_t msg_size;
uint32_t size;
size_t i;
int rc;
int rc, i, j;
void *message;
char *offset;
int modex_message_size;
mca_btl_openib_modex_message_t dummy;
/* Check if we have already created a IB proc
* structure for this ompi process */
@ -145,56 +168,118 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
/* Message was packed in btl_openib_component.c; the format is
listed in a comment in that file */
/* Unpack the number of ports in the message */
modex_message_size = ((char *) &(dummy.end)) - ((char*) &dummy);
/* Unpack the number of modules in the message */
offset = message;
memcpy(&size, offset, sizeof(uint32_t));
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
size = ntohl(size);
#endif
module_proc->proc_port_count = size;
module_proc->proc_ports = (mca_btl_openib_port_info_t *)malloc(sizeof(mca_btl_openib_port_info_t) * module_proc->proc_port_count);
offset += sizeof(uint32_t);
unpack8(&offset, &(module_proc->proc_port_count));
opal_output(-1, "unpack: %d btls", module_proc->proc_port_count);
if (module_proc->proc_port_count > 0) {
module_proc->proc_ports = (mca_btl_openib_proc_modex_t *)
malloc(sizeof(mca_btl_openib_proc_modex_t) *
module_proc->proc_port_count);
} else {
module_proc->proc_ports = NULL;
}
/* Loop over unpacking all the ports */
for (i = 0; i < module_proc->proc_port_count; i++) {
/* Unpack the port */
memcpy(&module_proc->proc_ports[i], offset,
sizeof(mca_btl_openib_port_info_t));
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
MCA_BTL_OPENIB_PORT_INFO_NTOH(module_proc->proc_ports[i]);
#endif
offset += sizeof(mca_btl_openib_port_info_t);
/* Unpack the string length */
memcpy(&size, offset, sizeof(size));
/* Unpack the modex comment message struct */
size = modex_message_size;
memcpy(&(module_proc->proc_ports[i].pm_port_info), offset, size);
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
size = ntohl(size);
MCA_BTL_OPENIB_MODEX_MSG_NTOH(module_proc->proc_ports[i].pm_port_info);
#endif
offset += sizeof(size);
/* Unpack the string */
module_proc->proc_ports[i].cpclist = malloc(size + 1);
if (NULL == module_proc->proc_ports[i].cpclist) {
/* JMS some error */
}
memcpy(module_proc->proc_ports[i].cpclist, offset, size);
module_proc->proc_ports[i].cpclist[size] = '\0';
offset += size;
opal_output(-1, "unpacked btl %d: modex message, offset now %d",
i, (int)(offset-((char*)message)));
/* Unpack the number of CPCs that follow */
unpack8(&offset, &(module_proc->proc_ports[i].pm_cpc_data_count));
opal_output(-1, "unpacked btl %d: number of cpcs to follow %d (offset now %d)",
i, module_proc->proc_ports[i].pm_cpc_data_count, (int)(offset-((char*)message)));
module_proc->proc_ports[i].pm_cpc_data =
calloc(module_proc->proc_ports[i].pm_cpc_data_count,
sizeof(ompi_btl_openib_connect_base_module_data_t));
if (NULL == module_proc->proc_ports[i].pm_cpc_data) {
return NULL;
}
/* Unpack the CPCs */
for (j = 0; j < module_proc->proc_ports[i].pm_cpc_data_count; ++j) {
uint8_t u8;
ompi_btl_openib_connect_base_module_data_t *cpcd;
cpcd = module_proc->proc_ports[i].pm_cpc_data + j;
unpack8(&offset, &u8);
opal_output(-1, "unpacked btl %d: cpc %d: index %d (offset now %d)",
i, j, u8, (int)(offset-(char*)message));
cpcd->cbm_component =
ompi_btl_openib_connect_base_get_cpc_byindex(u8);
opal_output(-1, "unpacked btl %d: cpc %d: component %s",
i, j, cpcd->cbm_component->cbc_name);
unpack8(&offset, &cpcd->cbm_priority);
unpack8(&offset, &cpcd->cbm_modex_message_len);
opal_output(-1, "unpacked btl %d: cpc %d: priority %d, msg len %d (offset now %d)",
i, j, cpcd->cbm_priority, cpcd->cbm_modex_message_len, (int)(offset-(char*)message));
if (cpcd->cbm_modex_message_len > 0) {
cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len);
if (NULL == cpcd->cbm_modex_message) {
BTL_ERROR(("Failed to malloc"));
return NULL;
}
memcpy(cpcd->cbm_modex_message, offset,
cpcd->cbm_modex_message_len);
offset += cpcd->cbm_modex_message_len;
opal_output(-1, "unpacked btl %d: cpc %d: blob unpacked %d %x (offset now %d)",
i, j,
((uint32_t*)cpcd->cbm_modex_message)[0],
((uint32_t*)cpcd->cbm_modex_message)[1],
(int)(offset-((char*)message)));
}
}
}
if (0 == module_proc->proc_port_count) {
module_proc->proc_endpoints = NULL;
} else {
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(module_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*));
malloc(module_proc->proc_port_count *
sizeof(mca_btl_base_endpoint_t*));
}
if (NULL == module_proc->proc_endpoints) {
OBJ_RELEASE(module_proc);
return NULL;
}
opal_output(-1, "unpacking done!");
return module_proc;
}
int mca_btl_openib_proc_remove(ompi_proc_t *proc,
mca_btl_base_endpoint_t *endpoint)
{
size_t i;
mca_btl_openib_proc_t* ib_proc = NULL;
/* Remove endpoint from the openib BTL version of the proc as
well */
ib_proc = mca_btl_openib_proc_lookup_ompi(proc);
if (NULL != ib_proc) {
for (i = 0; i < ib_proc->proc_endpoint_count; ++i) {
if (ib_proc->proc_endpoints[i] == endpoint) {
ib_proc->proc_endpoints[i] = NULL;
if (i == ib_proc->proc_endpoint_count - 1) {
--ib_proc->proc_endpoint_count;
}
return OMPI_SUCCESS;
}
}
}
return OMPI_ERR_NOT_FOUND;
}
/*
* Note that this routine must be called with the lock on the process

Просмотреть файл

@ -10,6 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -25,9 +26,31 @@
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
/* Must forward reference this to avoid include file loop */
struct ompi_btl_openib_connect_base_module_data_t;
/**
* Data received from the modex. For each openib BTL module/port in
* the peer, we'll receive two things:
*
* 1. Data about the peer's port
* 2. An array of CPCs that the peer has available on that port, each
* of which has its own meta data
*
* Hence, these two items need to be bundled together;
*/
typedef struct mca_btl_openib_proc_modex_t {
/** Information about the peer's port */
mca_btl_openib_modex_message_t pm_port_info;
/** Array of the peer's CPCs available on this port */
ompi_btl_openib_connect_base_module_data_t *pm_cpc_data;
/** Length of the pm_cpc_data array */
uint8_t pm_cpc_data_count;
} mca_btl_openib_proc_modex_t;
/**
* Represents the state of a remote process and the set of addresses
@ -36,27 +59,29 @@ extern "C" {
* BTL instance that attempts to open a connection to the process.
*/
struct mca_btl_openib_proc_t {
/** allow proc to be placed on a list */
opal_list_item_t super;
/**< allow proc to be placed on a list */
/** pointer to corresponding ompi_proc_t */
ompi_proc_t *proc_ompi;
/**< pointer to corresponding ompi_proc_t */
/** globally unique identifier for the process */
orte_process_name_t proc_guid;
/**< globally unique identifier for the process */
mca_btl_openib_port_info_t* proc_ports;
size_t proc_port_count;
/**< number of ports published by endpoint */
/** modex messages from this proc; one for each port in the peer */
mca_btl_openib_proc_modex_t *proc_ports;
/** length of proc_ports array */
uint8_t proc_port_count;
/** array of endpoints that have been created to access this proc */
struct mca_btl_base_endpoint_t **proc_endpoints;
/**< array of endpoints that have been created to access this proc */
/** number of endpoints (length of proc_endpoints array) */
size_t proc_endpoint_count;
/**< number of endpoints */
/** lock to protect against concurrent access to proc state */
opal_mutex_t proc_lock;
/**< lock to protect against concurrent access to proc state */
};
typedef struct mca_btl_openib_proc_t mca_btl_openib_proc_t;
@ -64,8 +89,9 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_proc_t);
mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc);
int mca_btl_openib_proc_insert(mca_btl_openib_proc_t*, mca_btl_base_endpoint_t*);
int mca_btl_openib_proc_remove(ompi_proc_t* proc,
mca_btl_base_endpoint_t* module_endpoint);
END_C_DECLS
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -72,6 +72,10 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_hca_t *hca)
/* This func. closes XRC domain */
int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_hca_t *hca)
{
if (NULL == hca->xrc_domain) {
/* No XRC domain, just exit */
return OMPI_SUCCESS;
}
if (ibv_close_xrc_domain(hca->xrc_domain)) {
BTL_ERROR(("Failed to close XRC domain, errno says %s\n",
hca->xrc_fd, strerror(errno)));

Просмотреть файл

@ -22,6 +22,8 @@
# ------------------------------------------
AC_DEFUN([MCA_btl_openib_POST_CONFIG], [
AM_CONDITIONAL([MCA_btl_openib_have_xrc], [test $1 -eq 1 -a "x$btl_openib_have_xrc" = "x1" -a "x$ompi_want_connectx_xrc" = "x1"])
AM_CONDITIONAL([MCA_btl_openib_have_rdmacm], [test $1 -eq 1 -a "x$btl_openib_have_rdmacm" = "x1"])
AM_CONDITIONAL([MCA_btl_openib_have_ibcm], [test $1 -eq 1 -a "x$btl_openib_have_ibcm" = "x1"])
])
@ -29,6 +31,9 @@ AC_DEFUN([MCA_btl_openib_POST_CONFIG], [
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_btl_openib_CONFIG],[
OMPI_VAR_SCOPE_PUSH([cpcs])
cpcs="oob"
OMPI_CHECK_OPENIB([btl_openib],
[btl_openib_happy="yes"],
[btl_openib_happy="no"])
@ -49,9 +54,24 @@ AC_DEFUN([MCA_btl_openib_CONFIG],[
[$2])
AS_IF([test "$btl_openib_happy" = "yes"],
[if test "x$btl_openib_have_xrc" = "x1" -a "x$ompi_want_connectx_xrc" = "x1"; then
cpcs="$cpcs xrc"
fi
if test "x$btl_openib_have_rdma_cm" = "x1"; then
cpcs="$cpcs rdma_cm"
fi
if test "x$btl_openib_have_ib_cm" = "x1"; then
cpcs="$cpcs ibcm"
fi
AC_MSG_CHECKING([which openib btl cpcs will be built])
AC_MSG_RESULT([$cpcs])])
# substitute in the things needed to build openib
AC_SUBST([btl_openib_CFLAGS])
AC_SUBST([btl_openib_CPPFLAGS])
AC_SUBST([btl_openib_LDFLAGS])
AC_SUBST([btl_openib_LIBS])
OMPI_VAR_SCOPE_POP
])dnl

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -15,21 +15,52 @@
BEGIN_C_DECLS
/**
* Global variable with the selected function pointers in it
*/
extern ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect;
/*
* Open function
*/
int ompi_btl_openib_connect_base_open(void);
int ompi_btl_openib_connect_base_register(void);
/*
* Component-wide CPC init
*/
int ompi_btl_openib_connect_base_init(void);
/*
* Query CPCs to see if they want to run on a specific module
*/
int ompi_btl_openib_connect_base_select_for_local_port
(mca_btl_openib_module_t *btl);
/*
* Forward reference to avoid an include file loop
*/
struct mca_btl_openib_proc_modex_t;
/*
* Select function
*/
int ompi_btl_openib_connect_base_select(char*, char*);
int ompi_btl_openib_connect_base_query(char**, mca_btl_openib_hca_t*);
int ompi_btl_openib_connect_base_find_match
(mca_btl_openib_module_t *btl,
struct mca_btl_openib_proc_modex_t *peer_port,
ompi_btl_openib_connect_base_module_t **local_cpc,
ompi_btl_openib_connect_base_module_data_t **remote_cpc_data);
/*
* Find a CPC's index so that we can send it in the modex
*/
int ompi_btl_openib_connect_base_get_cpc_index
(ompi_btl_openib_connect_base_component_t *cpc);
/*
* Lookup a CPC by its index (received from the modex)
*/
ompi_btl_openib_connect_base_component_t *
ompi_btl_openib_connect_base_get_cpc_byindex(uint8_t index);
/*
* Component-wide CPC finalize
*/
void ompi_btl_openib_connect_base_finalize(void);
END_C_DECLS

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies, Inc. All rights reserved.
*
* $COPYRIGHT$
@ -11,221 +11,394 @@
#include "ompi_config.h"
#include "btl_openib.h"
#include "btl_openib_proc.h"
#include "connect/base.h"
#include "connect/btl_openib_connect_oob.h"
#include "connect/btl_openib_connect_empty.h"
#if HAVE_XRC
#include "connect/btl_openib_connect_xoob.h"
#include "connect/btl_openib_connect_rdma_cm.h"
#endif
#if OMPI_HAVE_RDMACM
#include "connect/btl_openib_connect_rdmacm.h"
#endif
#if OMPI_HAVE_IBCM
#include "connect/btl_openib_connect_ibcm.h"
#endif
#include "opal/util/argv.h"
#include "opal/util/show_help.h"
/*
* Global variable with the selected function pointers in it
*/
ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect = {
"",
/* Compiler fills in the rest with NULL */
};
/*
* Array of all possible connection functions
*/
static ompi_btl_openib_connect_base_funcs_t *all[] = {
static ompi_btl_openib_connect_base_component_t *all[] = {
&ompi_btl_openib_connect_oob,
/* Always have an entry here so that the CP indexes will always be
the same: if XRC is not available, use the "empty" CPC */
#if HAVE_XRC
&ompi_btl_openib_connect_xoob,
#else
&ompi_btl_openib_connect_empty,
#endif
&ompi_btl_openib_connect_rdma_cm,
/* Always have an entry here so that the CP indexes will always be
the same: if RDMA CM is not available, use the "empty" CPC */
#if OMPI_HAVE_RDMACM
&ompi_btl_openib_connect_rdmacm,
#else
&ompi_btl_openib_connect_empty,
#endif
/* Always have an entry here so that the CP indexes will always be
the same: if IB CM is not available, use the "empty" CPC */
#if OMPI_HAVE_IBCM
&ompi_btl_openib_connect_ibcm,
#else
&ompi_btl_openib_connect_empty,
#endif
NULL
};
/*
* MCA parameter value
*/
static char *cpc_include = NULL;
static char *cpc_exclude = NULL;
static ompi_btl_openib_connect_base_component_t **available = NULL;
static int num_available = 0;
/*
* Register MCA parameters
*/
int ompi_btl_openib_connect_base_open(void)
int ompi_btl_openib_connect_base_register(void)
{
int i;
char **temp, *list, *string;
int i, j, save;
char **temp = NULL, *string = NULL, *all_cpc_names = NULL;
char *cpc_include = NULL, *cpc_exclude = NULL;
/* Make an MCA parameter to select which connect module to use */
temp = NULL;
for (i = 0; NULL != all[i]; ++i) {
opal_argv_append_nosize(&temp, all[i]->bcf_name);
opal_argv_append_nosize(&temp, all[i]->cbc_name);
}
list = opal_argv_join(temp, ',');
all_cpc_names = opal_argv_join(temp, ',');
opal_argv_free(temp);
asprintf(&string,
"Method used to select OpenFabrics connections (valid values: %s)",
list);
all_cpc_names);
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"cpc_include", string, false, false, NULL, &cpc_include);
"cpc_include", string, false, false,
NULL, &cpc_include);
free(string);
asprintf(&string,
"Method used to exclude OpenFabrics connections (valid values: %s)",
list);
all_cpc_names);
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"cpc_exclude", string, false, false, NULL, &cpc_exclude);
free(list);
"cpc_exclude", string, false, false,
NULL, &cpc_exclude);
free(string);
/* Call the open function on all the connect modules so that they
* may setup any MCA params specific to the connection type
*/
for (i = 0; NULL != all[i]; ++i) {
if (NULL != all[i]->bcf_open) {
all[i]->bcf_open();
/* Parse the if_[in|ex]clude paramters to come up with a list of
CPCs that are available */
available = calloc(1, sizeof(all));
/* If we have an "include" list, then find all those CPCs and put
them in available[] */
if (NULL != cpc_include) {
temp = opal_argv_split(cpc_include, ',');
for (save = j = 0; NULL != temp[j]; ++j) {
for (i = 0; NULL != all[i]; ++i) {
if (0 == strcmp(temp[j], all[i]->cbc_name)) {
opal_output(-1, "include: saving %s", all[i]->cbc_name);
available[save++] = all[i];
++num_available;
break;
}
}
if (NULL == all[i]) {
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"cpc name not found", true,
"include", orte_process_info.nodename,
"include", cpc_include, temp[j],
all_cpc_names);
opal_argv_free(temp);
free(all_cpc_names);
return OMPI_ERR_NOT_FOUND;
}
}
opal_argv_free(temp);
}
/* Otherwise, if we have an "exclude" list, take all the CPCs that
are not in that list and put them in available[] */
else if (NULL != cpc_exclude) {
temp = opal_argv_split(cpc_exclude, ',');
/* First: error check -- ensure that all the names are valid */
for (j = 0; NULL != temp[j]; ++j) {
for (i = 0; NULL != all[i]; ++i) {
if (0 == strcmp(temp[j], all[i]->cbc_name)) {
break;
}
}
if (NULL == all[i]) {
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"cpc name not found", true,
"exclude", orte_process_info.nodename,
"exclude", cpc_exclude, temp[j],
all_cpc_names);
opal_argv_free(temp);
free(all_cpc_names);
return OMPI_ERR_NOT_FOUND;
}
}
/* Now do the exclude */
for (save = i = 0; NULL != all[i]; ++i) {
for (j = 0; NULL != temp[j]; ++j) {
if (0 == strcmp(temp[j], all[i]->cbc_name)) {
break;
}
}
if (NULL == temp[j]) {
opal_output(-1, "exclude: saving %s", all[i]->cbc_name);
available[save++] = all[i];
++num_available;
}
}
opal_argv_free(temp);
}
/* If there's no include/exclude list, copy all[] into available[] */
else {
opal_output(-1, "no include or exclude: saving all");
memcpy(available, all, sizeof(all));
num_available = (sizeof(all) /
sizeof(ompi_btl_openib_connect_base_module_t *)) - 1;
}
/* Call the register function on all the CPCs so that they may
setup any MCA params specific to the connection type */
for (i = 0; NULL != available[i]; ++i) {
if (NULL != available[i]->cbc_register) {
available[i]->cbc_register();
}
}
return OMPI_SUCCESS;
}
/*
* Called once during openib BTL component initialization to allow CPC
* components to initialize.
*/
int ompi_btl_openib_connect_base_init(void)
{
int i, rc;
/* Call each available CPC component's open function, if it has
one. If the CPC component open function returns OMPI_SUCCESS,
keep it. If it returns ERR_NOT_SUPPORTED, remove it from the
available[] array. If it returns something else, return that
error upward. */
for (i = num_available = 0; NULL != available[i]; ++i) {
if (NULL == available[i]->cbc_init) {
available[num_available++] = available[i];
opal_output(-1, "found available cpc (NULL init): %s",
all[i]->cbc_name);
continue;
}
rc = available[i]->cbc_init();
if (OMPI_SUCCESS == rc) {
available[num_available++] = available[i];
opal_output(-1, "found available cpc (SUCCESS init): %s",
all[i]->cbc_name);
continue;
} else if (OMPI_ERR_NOT_SUPPORTED == rc) {
continue;
} else {
return rc;
}
}
available[num_available] = NULL;
return (num_available > 0) ? OMPI_SUCCESS : OMPI_ERR_NOT_AVAILABLE;
}
/*
* Find all the CPCs that are eligible for a single local port (i.e.,
* openib module).
*/
int ompi_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *btl)
{
char *msg = NULL;
int i, rc, cpc_index, len;
ompi_btl_openib_connect_base_module_t **cpcs;
cpcs = calloc(num_available,
sizeof(ompi_btl_openib_connect_base_module_t *));
if (NULL == cpcs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Go through all available CPCs and query them to see if they
want to run on this module. If they do, save them to a running
array. */
for (len = 1, i = 0; NULL != available[i]; ++i) {
len += strlen(available[i]->cbc_name) + 2;
}
msg = malloc(len);
if (NULL == msg) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
msg[0] = '\0';
for (cpc_index = i = 0; NULL != available[i]; ++i) {
if (i > 0) {
strcat(msg, ", ");
}
strcat(msg, available[i]->cbc_name);
rc = available[i]->cbc_query(btl, &cpcs[cpc_index]);
if (OMPI_ERR_NOT_SUPPORTED == rc) {
continue;
} else if (OMPI_SUCCESS != rc) {
free(cpcs);
free(msg);
return rc;
}
opal_output(-1, "match cpc for local port: %s",
available[i]->cbc_name);
/* This CPC has indicated that it wants to run on this openib
BTL module. Woo hoo! */
++cpc_index;
}
/* If we got an empty array, then no CPCs were eligible. Doh! */
if (0 == cpc_index) {
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"no cpcs for port", true,
orte_process_info.nodename,
ibv_get_device_name(btl->hca->ib_dev),
msg);
free(cpcs);
free(msg);
return OMPI_ERR_NOT_SUPPORTED;
}
free(msg);
/* We got at least one eligible CPC; save the array into the
module's port_info */
btl->cpcs = cpcs;
btl->num_cpcs = cpc_index;
return OMPI_SUCCESS;
}
/*
* The connection method is chosen by comparing the lists passed around
* to all nodes via modex with the list generated locally. Any
* non-negative number is a potentially valid connection method. The
* method below of determining the optimal connection method is to take
* the cross-section of the two lists. The highest single value (and
* the other side being non-negative) is selected as the cpc method.
* This function is invoked when determining whether we have a CPC in
* common with a specific remote port. We already know that the
* subnet ID is the same between a specific local port and the target
* remote port; now we need to know if we can find a CPC in common
* between the two.
*
* If yes, be sure to find the *same* CPC on both sides. We know
* which CPCs are available on each side, and we know the priorities
* that were assigned on both sides. So find a CPC that is common to
* both sides and has the highest overall priority (between both
* sides).
*
* Return the matching CPC, or NULL if not found.
*/
int ompi_btl_openib_connect_base_select(char *remotelist, char *locallist)
int
ompi_btl_openib_connect_base_find_match(mca_btl_openib_module_t *btl,
mca_btl_openib_proc_modex_t *peer_port,
ompi_btl_openib_connect_base_module_t **ret_local_cpc,
ompi_btl_openib_connect_base_module_data_t **ret_remote_cpc_data)
{
int i, j, max = -1;
char **localist_formatted, **remotelist_formatted;
char *name = NULL;
ompi_btl_openib_connect_base_module_t *local_cpc, *local_selected = NULL;
ompi_btl_openib_connect_base_module_data_t *local_cpcd, *remote_cpcd,
*remote_selected = NULL;
BTL_VERBOSE(("remotelist = %s locallist = %s", remotelist, locallist));
/* Iterate over all the CPCs on the local module */
for (i = 0; i < btl->num_cpcs; ++i) {
local_cpc = btl->cpcs[i];
local_cpcd = &(local_cpc->data);
localist_formatted = opal_argv_split(locallist, ',');
remotelist_formatted = opal_argv_split(remotelist, ',');
/* Iterate over all the CPCs on the remote port */
for (j = 0; j < peer_port->pm_cpc_data_count; ++j) {
remote_cpcd = &(peer_port->pm_cpc_data[j]);
for (i = 0; NULL != localist_formatted[i] && NULL != localist_formatted[i+1]; i+=2) {
for (j = 0; NULL != remotelist_formatted[j] && NULL != remotelist_formatted[j+1]; j+=2) {
int local_val, remote_val;
local_val = atoi(localist_formatted[i+1]);
remote_val = atoi(remotelist_formatted[j+1]);
if (0 == strcmp(localist_formatted[i], remotelist_formatted[j]) &&
(-1 != local_val && -1 != remote_val)) {
if (local_val > max) {
max = local_val;
name = localist_formatted[i];
/* Are the components the same? */
if (local_cpcd->cbm_component == remote_cpcd->cbm_component) {
/* If so, update the max priority found so far */
if (max < local_cpcd->cbm_priority) {
max = local_cpcd->cbm_priority;
local_selected = local_cpc;
remote_selected = remote_cpcd;
}
if (remote_val > max) {
max = remote_val;
name = remotelist_formatted[j];
if (max < remote_cpcd->cbm_priority) {
max = remote_cpcd->cbm_priority;
local_selected = local_cpc;
remote_selected = remote_cpcd;
}
}
}
}
if (-1 == max) {
BTL_ERROR(("Failed to find any working connections"));
return OMPI_ERROR;
}
for (i = 0; NULL != all[i]; i++) {
if (0 == strcmp(all[i]->bcf_name, name)) {
int rc;
rc = all[i]->bcf_init();
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("A problem was encountered with %s, ignoring this cpc", all[i]->bcf_name));
return OMPI_ERROR;
}
ompi_btl_openib_connect = *(all[i]);
break;
}
}
BTL_VERBOSE(("%s selected as transport", all[i]->bcf_name));
opal_argv_free(localist_formatted);
opal_argv_free(remotelist_formatted);
return OMPI_SUCCESS;
}
static inline int cpc_specific_query(char ***cpclist, mca_btl_openib_hca_t *hca, int cpc_counter, bool *valid)
{
char *temp;
int rc;
if (NULL == all[cpc_counter]->bcf_query) {
/* All done! */
if (NULL != local_selected) {
*ret_local_cpc = local_selected;
*ret_remote_cpc_data = remote_selected;
opal_output(-1, "find_match: found match!");
return OMPI_SUCCESS;
} else {
opal_output(-1, "find_match: did NOT find match!");
return OMPI_ERR_NOT_FOUND;
}
rc = all[cpc_counter]->bcf_query(hca);
if (rc > 0) {
*valid = 1;
}
asprintf(&temp, "%s,%d", all[cpc_counter]->bcf_name, rc);
opal_argv_append_nosize(cpclist, temp);
return OMPI_SUCCESS;
}
int ompi_btl_openib_connect_base_query(char **cpclist, mca_btl_openib_hca_t *hca)
/*
* Lookup a CPC component's index in the all[] array so that we can
* send it int the modex
*/
int ompi_btl_openib_connect_base_get_cpc_index(ompi_btl_openib_connect_base_component_t *cpc)
{
int i, rc;
bool valid = 0;
char **cpclist_include, **cpclist_exclude, **namepriority_list = NULL;
cpclist_include = opal_argv_split(cpc_include, ',');
cpclist_exclude = opal_argv_split(cpc_exclude, ',');
/* Go through all the CMs to create a list of usable CPCs */
int i;
for (i = 0; NULL != all[i]; ++i) {
if (NULL != cpclist_include) {
int j;
for (j = 0; NULL != cpclist_include[j]; ++j) {
if (0 == strcmp(cpclist_include[j], all[i]->bcf_name)) {
rc = cpc_specific_query(&namepriority_list, hca, i, &valid);
if (OMPI_ERROR == rc) {
return OMPI_ERROR;
}
}
}
} else if (NULL != cpclist_exclude) {
int j;
for (j = 0; NULL != cpclist_exclude[j]; ++j) {
if (0 != strcmp(cpclist_exclude[j], all[i]->bcf_name)) {
rc = cpc_specific_query(&namepriority_list, hca, i, &valid);
if (OMPI_ERROR == rc) {
return OMPI_ERROR;
}
}
}
} else {
rc = cpc_specific_query(&namepriority_list, hca, i, &valid);
if (OMPI_ERROR == rc) {
return OMPI_ERROR;
}
if (all[i] == cpc) {
return i;
}
}
if (0 == valid) {
BTL_ERROR(("Failed to find any valid connections for %s, not "
"using it for this run",
ibv_get_device_name(hca->ib_dev)));
return OMPI_ERROR;
}
*cpclist = opal_argv_join(namepriority_list, ',');
opal_argv_free(namepriority_list);
return OMPI_SUCCESS;
/* Not found */
return -1;
}
/*
* Lookup a CPC by its index (received from the modex)
*/
ompi_btl_openib_connect_base_component_t *
ompi_btl_openib_connect_base_get_cpc_byindex(uint8_t index)
{
return (index >= (sizeof(all) /
sizeof(ompi_btl_openib_connect_base_module_t *))) ?
NULL : all[index];
}
/*
* Called during openib btl component close
*/
void ompi_btl_openib_connect_base_finalize(void)
{
int i;
if (NULL != available) {
for (i = 0; NULL != available[i]; ++i) {
if (NULL != available[i]->cbc_finalize) {
available[i]->cbc_finalize();
}
}
free(available);
}
}

Просмотреть файл

@ -0,0 +1,46 @@
/*
* Copyright (c) 2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#include "connect/connect.h"
static void empty_component_register(void);
static int empty_component_init(void);
static int empty_component_query(mca_btl_openib_module_t *btl,
ompi_btl_openib_connect_base_module_t **cpc);
ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_empty = {
"empty",
empty_component_register,
empty_component_init,
empty_component_query,
NULL
};
static void empty_component_register(void)
{
/* Nothing to do */
}
static int empty_component_init(void)
{
/* Never let this CPC run */
return OMPI_ERR_NOT_SUPPORTED;
}
static int empty_component_query(mca_btl_openib_module_t *btl,
ompi_btl_openib_connect_base_module_t **cpc)
{
/* Never let this CPC run */
return OMPI_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -0,0 +1,20 @@
/*
* Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef BTL_OPENIB_CONNECT_EMPTY_H
#define BTL_OPENIB_CONNECT_EMPTY_H
#include "ompi_config.h"
#include "connect/connect.h"
extern ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_empty;
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -15,6 +15,6 @@
#include "connect/connect.h"
extern ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_ibcm;
extern ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_ibcm;
#endif

Просмотреть файл

@ -22,13 +22,14 @@
#include "ompi_config.h"
#include "opal/dss/dss.h"
#include "opal/util/output.h"
#include "opal/util/error.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "opal/dss/dss.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/mca/dpm/dpm.h"
#include "btl_openib.h"
@ -43,13 +44,15 @@ typedef enum {
} connect_message_type_t;
static int oob_priority = 50;
static bool rml_recv_posted = false;
static void oob_open(void);
static int oob_init(void);
static int oob_start_connect(mca_btl_base_endpoint_t *e);
static int oob_query(mca_btl_openib_hca_t *hca);
static int oob_finalize(void);
static void oob_component_register(void);
static int oob_component_query(mca_btl_openib_module_t *openib_btl,
ompi_btl_openib_connect_base_module_t **cpc);
static int oob_component_finalize(void);
static int oob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
mca_btl_base_endpoint_t *endpoint);
static int reply_start_connect(mca_btl_openib_endpoint_t *endpoint,
mca_btl_openib_rem_info_t *rem_info);
static int set_remote_info(mca_btl_base_endpoint_t* endpoint,
@ -69,25 +72,23 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
void* cbdata);
/*
* The "module" struct -- the top-level function pointers for the oob
* connection scheme.
* The "component" struct -- the top-level function pointers for the
* oob connection scheme.
*/
ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_oob = {
ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_oob = {
"oob",
/* Open */
oob_open,
/* Register */
oob_component_register,
/* Init */
oob_init,
/* Connect */
oob_start_connect,
NULL,
/* Query */
oob_query,
oob_component_query,
/* Finalize */
oob_finalize,
oob_component_finalize
};
/* Open - this functions sets up any oob specific commandline params */
static void oob_open(void)
static void oob_component_register(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_oob_priority",
@ -105,16 +106,64 @@ static void oob_open(void)
* Init function. Post non-blocking RML receive to accept incoming
* connection requests.
*/
static int oob_init(void)
static int oob_component_query(mca_btl_openib_module_t *btl,
ompi_btl_openib_connect_base_module_t **cpc)
{
int rc;
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
OMPI_RML_TAG_OPENIB,
ORTE_RML_PERSISTENT,
rml_recv_cb,
NULL);
return (ORTE_SUCCESS == rc) ? OMPI_SUCCESS : rc;
/* If we have the transport_type member, check to ensure we're on
IB (this CPC will not work with iWarp). If we do not have the
transport_type member, then we must be < OFED v1.2, and
therefore we must be IB. */
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
if (IBV_TRANSPORT_IB != btl->hca->ib_dev->transport_type) {
opal_output_verbose(5, mca_btl_base_output,
"openib BTL: oob CPC only supported on InfiniBand; skipped on device %s",
ibv_get_device_name(btl->hca->ib_dev));
return OMPI_ERR_NOT_SUPPORTED;
}
#endif
/* If this btl supports OOB, then post the RML message. But
ensure to only post it *once*, because another btl may have
come in before this and already posted it. */
if (!rml_recv_posted) {
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
OMPI_RML_TAG_OPENIB,
ORTE_RML_PERSISTENT,
rml_recv_cb,
NULL);
if (ORTE_SUCCESS != rc) {
opal_output_verbose(5, mca_btl_base_output,
"openib BTL: oob CPC system error %d (%s)",
rc, opal_strerror(rc));
return rc;
}
rml_recv_posted = true;
}
*cpc = malloc(sizeof(ompi_btl_openib_connect_base_module_t));
if (NULL == *cpc) {
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
rml_recv_posted = false;
opal_output_verbose(5, mca_btl_base_output,
"openib BTL: oob CPC system error (malloc failed)");
return OMPI_ERR_OUT_OF_RESOURCE;
}
(*cpc)->data.cbm_component = &ompi_btl_openib_connect_oob;
(*cpc)->data.cbm_priority = oob_priority;
(*cpc)->data.cbm_modex_message = NULL;
(*cpc)->data.cbm_modex_message_len = 0;
(*cpc)->cbm_endpoint_init = NULL;
(*cpc)->cbm_start_connect = oob_module_start_connect;
(*cpc)->cbm_endpoint_finalize = NULL;
(*cpc)->cbm_finalize = NULL;
opal_output_verbose(5, mca_btl_base_output,
"openib BTL: oob CPC available for use on %s",
ibv_get_device_name(btl->hca->ib_dev));
return OMPI_SUCCESS;
}
/*
@ -123,10 +172,11 @@ static int oob_init(void)
* communication mechanism. On completion of our send, a send
* completion handler is called.
*/
static int oob_start_connect(mca_btl_base_endpoint_t *endpoint)
static int oob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
mca_btl_base_endpoint_t *endpoint)
{
int rc;
if (OMPI_SUCCESS != (rc = qp_create_all(endpoint))) {
return rc;
}
@ -142,26 +192,16 @@ static int oob_start_connect(mca_btl_base_endpoint_t *endpoint)
return OMPI_SUCCESS;
}
static int oob_query(mca_btl_openib_hca_t *hca)
/*
* Component finalize function. Cleanup RML non-blocking receive.
*/
static int oob_component_finalize(void)
{
/* JMS need something better than this */
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
if (IBV_TRANSPORT_IB == hca->ib_dev->transport_type) {
return oob_priority;
if (rml_recv_posted) {
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
rml_recv_posted = false;
}
return -1;
#else
return oob_priority;
#endif
}
/*
* Finalize function. Cleanup RML non-blocking receive.
*/
static int oob_finalize(void)
{
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
return OMPI_SUCCESS;
}
@ -705,7 +745,8 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
if (master) {
rc = reply_start_connect(ib_endpoint, &rem_info);
} else {
rc = oob_start_connect(ib_endpoint);
rc = oob_module_start_connect(ib_endpoint->endpoint_local_cpc,
ib_endpoint);
}
if (OMPI_SUCCESS != rc) {

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -13,6 +13,6 @@
#include "connect/connect.h"
extern ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_oob;
extern ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_oob;
#endif

Просмотреть файл

@ -1,78 +0,0 @@
/*
* Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "btl_openib_endpoint.h"
#include "connect/connect.h"
static void rdma_cm_open(void);
static int rdma_cm_init(void);
static int rdma_cm_connect(mca_btl_base_endpoint_t *e);
static int rdma_cm_query(mca_btl_openib_hca_t *hca);
static int rdma_cm_finalize(void);
ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_rdma_cm = {
"rdma_cm",
rdma_cm_open,
rdma_cm_init,
rdma_cm_connect,
rdma_cm_query,
rdma_cm_finalize,
};
static int rdma_cm_priority = -1;
/* Open - this functions sets up any rdma_cm specific commandline params */
static void rdma_cm_open(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_rdma_cm_priority",
"The selection method priority for rdma_cm",
false, false, rdma_cm_priority, &rdma_cm_priority);
if (rdma_cm_priority > 100) {
rdma_cm_priority = 100;
} else if (rdma_cm_priority < -1) {
rdma_cm_priority = -1;
}
}
static int rdma_cm_init(void)
{
BTL_ERROR(("rdma cm init"));
return OMPI_ERR_NOT_IMPLEMENTED;
}
static int rdma_cm_connect(mca_btl_base_endpoint_t *e)
{
BTL_ERROR(("rdma cm connect"));
return OMPI_ERR_NOT_IMPLEMENTED;
}
static int rdma_cm_query(mca_btl_openib_hca_t *hca)
{
/* JMS need something better than this */
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
if (IBV_TRANSPORT_IWARP == hca->ib_dev->transport_type) {
BTL_ERROR(("rdma cm Not currently supported"));
return rdma_cm_priority;
}
#endif
return -1;
}
static int rdma_cm_finalize(void)
{
BTL_ERROR(("rdma cm finalize"));
return OMPI_ERR_NOT_IMPLEMENTED;
}

Просмотреть файл

@ -1,20 +0,0 @@
/*
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef BTL_OPENIB_CONNECT_RDMA_CM_H
#define BTL_OPENIB_CONNECT_RDMA_CM_H
#include "ompi_config.h"
#include "connect/connect.h"
extern ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_rdma_cm;
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,20 @@
/*
* Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef BTL_OPENIB_CONNECT_RDMACM_H
#define BTL_OPENIB_CONNECT_RDMACM_H
#include "ompi_config.h"
#include "connect/connect.h"
extern ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_rdmacm;
#endif

Просмотреть файл

@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -10,10 +11,12 @@
#include "ompi_config.h"
#include "opal/dss/dss.h"
#include "opal/util/error.h"
#include "opal/util/output.h"
#include "orte/util/name_fns.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "opal/dss/dss.h"
#include "ompi/mca/dpm/dpm.h"
#include "btl_openib.h"
@ -23,28 +26,28 @@
#include "btl_openib_async.h"
#include "connect/connect.h"
static void xoob_open(void);
static int xoob_init(void);
static int xoob_start_connect(mca_btl_base_endpoint_t *e);
static int xoob_query(mca_btl_openib_hca_t *hca);
static int xoob_finalize(void);
static void xoob_component_register(void);
static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
ompi_btl_openib_connect_base_module_t **cpc);
static int xoob_component_finalize(void);
static int xoob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
mca_btl_base_endpoint_t *endpoint);
/*
* The "module" struct -- the top-level function pointers for the xoob
* connection scheme.
* The "component" struct -- the top-level function pointers for the
* xoob connection scheme.
*/
ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_xoob = {
ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_xoob = {
"xoob",
/* Open */
xoob_open,
/* Register */
xoob_component_register,
/* Init */
xoob_init,
/* Connect */
xoob_start_connect,
NULL,
/* Query */
xoob_query,
xoob_component_query,
/* Finalize */
xoob_finalize,
xoob_component_finalize
};
typedef enum {
@ -55,6 +58,8 @@ typedef enum {
ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE /* The xrc recv qp already was destroyed */
} connect_message_type_t;
static bool rml_recv_posted = false;
#define XOOB_SET_REMOTE_INFO(EP, INFO) \
do { \
/* copy the rem_info stuff */ \
@ -707,8 +712,8 @@ static void xoob_restart_connect(mca_btl_base_endpoint_t *endpoint)
" starting from scratch\n",
endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid));
OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock);
/* xoob_start_connect() should automaticly handle all other cases */
if (OMPI_SUCCESS != xoob_start_connect(endpoint))
/* xoob_module_start_connect() should automaticly handle all other cases */
if (OMPI_SUCCESS != xoob_module_start_connect(NULL, endpoint))
BTL_ERROR(("Failed to restart connection from MCA_BTL_IB_ADDR_CONNECTING/CLOSED"));
break;
default :
@ -912,17 +917,61 @@ static void xoob_rml_recv_cb(int status, orte_process_name_t* process_name,
*/
/* Quere for the XOOB priority - will be highest in XRC case */
static int xoob_query(mca_btl_openib_hca_t *hca)
static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
ompi_btl_openib_connect_base_module_t **cpc)
{
if (mca_btl_openib_component.num_xrc_qps > 0) {
return xoob_priority;
int rc;
if (mca_btl_openib_component.num_xrc_qps <= 0) {
opal_output_verbose(5, mca_btl_base_output,
"openib BTL: xoob CPC only supported with XRC receive queues; skipped on device %s",
ibv_get_device_name(openib_btl->hca->ib_dev));
return OMPI_ERR_NOT_SUPPORTED;
}
return -1;
*cpc = malloc(sizeof(ompi_btl_openib_connect_base_module_t));
if (NULL == *cpc) {
opal_output_verbose(5, mca_btl_base_output,
"openib BTL: xoob CPC system error (malloc failed)");
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* If this btl supports XOOB, then post the RML message. But
ensure to only post it *once*, because another btl may have
come in before this and already posted it. */
if (!rml_recv_posted) {
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
OMPI_RML_TAG_XOPENIB,
ORTE_RML_PERSISTENT,
xoob_rml_recv_cb,
NULL);
if (ORTE_SUCCESS != rc) {
opal_output_verbose(5, mca_btl_base_output,
"openib BTL: xoob CPC system error %d (%s)",
rc, opal_strerror(rc));
return rc;
}
rml_recv_posted = true;
}
(*cpc)->data.cbm_component = &ompi_btl_openib_connect_xoob;
(*cpc)->data.cbm_priority = xoob_priority;
(*cpc)->data.cbm_modex_message = NULL;
(*cpc)->data.cbm_modex_message_len = 0;
(*cpc)->cbm_endpoint_init = NULL;
(*cpc)->cbm_start_connect = xoob_module_start_connect;
(*cpc)->cbm_endpoint_finalize = NULL;
(*cpc)->cbm_finalize = NULL;
opal_output_verbose(5, mca_btl_base_output,
"openib BTL: xoob CPC available for use on %s",
ibv_get_device_name(openib_btl->hca->ib_dev));
return OMPI_SUCCESS;
}
/* Open - this functions sets up any xoob specific commandline params */
static void xoob_open(void)
static void xoob_component_register(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_xoob_priority",
@ -936,29 +985,14 @@ static void xoob_open(void)
}
}
/*
* Init function. Post non-blocking RML receive to accept incoming
* connection requests.
*/
static int xoob_init(void)
{
int rc;
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
OMPI_RML_TAG_XOPENIB,
ORTE_RML_PERSISTENT,
xoob_rml_recv_cb,
NULL);
return (ORTE_SUCCESS == rc) ? OMPI_SUCCESS : rc;
}
/*
* Connect function. Start initiation of connections to a remote
* peer. We send our Queue Pair information over the RML/OOB
* communication mechanism. On completion of our send, a send
* completion handler is called.
*/
static int xoob_start_connect(mca_btl_base_endpoint_t *endpoint)
static int xoob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
mca_btl_base_endpoint_t *endpoint)
{
int rc = OMPI_SUCCESS;
@ -987,7 +1021,7 @@ static int xoob_start_connect(mca_btl_base_endpoint_t *endpoint)
" Subscribing to this address\n",
endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid));
/* some body already connectng to this machine, lets wait */
opal_list_append(&endpoint->ib_addr->pending_ep, (opal_list_item_t*)endpoint);
opal_list_append(&endpoint->ib_addr->pending_ep, &(endpoint->super));
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
break;
case MCA_BTL_IB_ADDR_CONNECTED:
@ -1014,8 +1048,11 @@ static int xoob_start_connect(mca_btl_base_endpoint_t *endpoint)
/*
* Finalize function. Cleanup RML non-blocking receive.
*/
static int xoob_finalize(void)
static int xoob_component_finalize(void)
{
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_XOPENIB);
if (rml_recv_posted) {
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_XOPENIB);
rml_recv_posted = false;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -13,6 +14,6 @@
#include "connect/connect.h"
extern ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_xoob;
extern ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_xoob;
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -17,38 +17,118 @@
* friends are not used, but all the functionality is accessed through
* struct's of function pointers, so you can swap between multiple
* different implementations at run time, just like real components).
* Hence, these entities are referred to as "Connect
* Pseudo-Components" (CPCs).
*
* Currently, the connect functions are referenced by their names
* (e.g., "oob", "rdma_cm"). The decision which to use is made during
* the openib BTL init() function call.
* The CPCs are referenced by their names (e.g., "oob", "rdma_cm").
*
* Note that the openib BTL's open() function calls the
* connect_base_open() function, which registers an MCA parameter, and
* scans all the connect modules to see if they have open() functions.
* If they do, they are called. In this way, the connect modules can
* register MCA parameters that show up in ompi_info output.
* CPCs are split into components and modules, similar to all other
* MCA frameworks in this code base.
*
* There are four main functions to this interface:
* Before diving into the CPC interface, let's discuss some
* terminology and mappings of data structures:
*
* - open: as described above, used to register MCA params for connect
* modules
* - a BTL module represents a network port (in the case of the openib
* BTL, a LID)
* - a CPC module represents one way to make connections to a BTL module
* - hence, a BTL module has potentially multiple CPC modules
* associated with it
* - an endpoint represnts a connection between a local BTL module and
* a remote BTL module (in the openib BTL, because of BSRQ, an
* endpoint can contain multiple QPs)
* - when an endpoint is created, one of the CPC modules associated
* with the local BTL is selected and associated with the endpoint
* (obviously, it is a CPC module that is common between the local
* and remote BTL modules)
* - endpoints may be created and destroyed during the MPI job
* - endpoints are created lazily, during the first communication
* between two peers
* - endpoints are destroyed when two MPI processes become
* disconnected (e.g., MPI-2 dynamics or MPI_FINALIZE)
* - hence, BTL modules and CPC modules outlive endpoints.
* Specifically, BTL modules and CPC modules live from MPI_INIT to
* MPI_FINALIZE. endpoints come and go as MPI semantics demand it.
* - therefore, CPC modules need to cache information on endpoints that
* are specific to that connection.
*
* - init: to select a connect module. The module is responsible for
* setting itself up for asynchronous operation for incoming
* connection requests (e.g., putting fd's in the progress engine,
* posting non-blocking RML requests, spawning a background thread,
* etc.).
* Component interface:
*
* - start_connect: initiate a connection to a remote peer. Similar
* to init, the module is responsible for setting itself up for
* asyncronous operation for progressing the outgoing connection
* request.
* - component_register(): The openib BTL's component_open() function
* calls the connect_base_register() function, which scans all
* compiled-in CPC's. If they have component_register() functions,
* they are called (component_register() functions are only allowed to
* register MCA parameters).
*
* - finalize: shut down all asynchronous handling. No need to clean
* up the connections that were made; that's the responsibility of the
* main openib BTL.
* NOTE: The connect_base_register() function will process the
* btl_openib_cpc_include and btl_openib_cpc_exclude MCA parameters
* and automatically include/exclude CPCs as relevant. If a CPC is
* excluded, none of its other interface functions will be invoked for
* the duration of the process.
*
* There are two functions in the main openib BTL that the module will
* - component_init(): The openib BTL's component_init() function
* calls connect_base_init(), which will invoke this query function on
* each CPC to see if it wants to run at all. CPCs can gracefully
* remove themselves from consideration in this process by returning
* OMPI_ERR_NOT_SUPPORTED.
*
* - component_query(): The openib BTL's init_one_port() calls the
* connect_base_select_for_local_port() function, which, for each LID
* on that port, calls the component_query() function on every
* available CPC on that LID. This function is intended to see if a
* CPC can run on a sepcific openib BTL module (i.e., LID). If it
* can, the CPC is supposed to create a CPC module that is specific to
* that BTL/LID and return it. If it cannot, it should return
* OMPI_ERR_NOT_SUPPORTED and be gracefully skipped for this
* OpenFabrics port.
*
* component_finalize(): The openib BTL's component_close() function
* calls connect_base_finalize(), which, in turn, calls the
* component_finalize() function on all available CPCs. Note that all
* CPC modules will have been finalized by this point; the CPC
* component_finalize() function is a chance for the CPC to clean up
* any component-specific resources.
*
* Module interface:
*
* cbm_component member: A pointer pointing to the single, global
* instance of the CPC component. This member is used for creating a
* unique index representing the modules' component so that it can be
* shared with remote peer processes.
*
* cbm_priority member: An integer between 0 and 100, inclusive,
* representing the priority of this CPC.
*
* cbm_modex_message member: A pointer to a blob buffer that will be
* included in the modex message for this port for this CPC (it is
* assumed that this blob is a) only understandable by the
* corresponding CPC in the peer process, and b) contains specific
* addressing/contact information for *this* port's CPC module).
*
* cbm_modex_message_len member: The length of the cbm_modex_message
* blob, in bytes.
*
* cbm_endpoint_init(): Called during endpoint creation, allowing a
* CPC module to cache information on the endpoint. A pointer to the
* endpoint's CPC module is already cached on the endpoint.
*
* cbm_start_connect(): initiate a connection to a remote peer. The
* CPC is responsible for setting itself up for asyncronous operation
* for progressing the outgoing connection request.
*
* cbm_endpoint_finalize(): Called during the endpoint destrouction,
* allowing the CPC module to destroy anything that it cached on the
* endpoint.
*
* cbm_finalize(): shut down all asynchronous handling and clean up
* any state that was setup for this CPC module/BTL. Some CPCs setup
* asynchronous support on a per-HCA/NIC basis (vs. per-port/LID). It
* is the reponsibility of the CPC to figure out such issues (e.g.,
* via reference counting) -- there is no notification from the
* upper-level BTL about when an entire HCA/NIC is no longer being
* used. There is only this function, which tells when a specific
* CPC/BTL module is no longer being used.
*
* There are two functions in the main openib BTL that the CPC will
* call:
*
* - ompi_btl_openib_post_recvs(endpoint): once a QP is locally
@ -70,58 +150,187 @@ BEGIN_C_DECLS
#define BCF_MAX_NAME 64
/**
* Must forward declare mca_btl_openib_hca_t; it's defined in
* btl_openib.h, but that file includes this file.
* Must forward declare these structs to avoid include file loops.
*/
struct mca_btl_openib_hca_t;
struct mca_btl_openib_module_t;
struct mca_btl_base_endpoint_t;
/**
* Function to register MCA params in the connect functions
* This is struct is defined below
*/
typedef void (*ompi_btl_openib_connect_base_func_open_t)(void);
struct ompi_btl_openib_connect_base_module_t;
/************************************************************************/
/**
* Function to intialize the connection functions (i.e., it's been
* selected, so do whatever setup is necessary).
* Function to register MCA params in the connect functions. It
* returns no value, so it cannot fail.
*/
typedef int (*ompi_btl_openib_connect_base_func_init_t)(void);
typedef void (*ompi_btl_openib_connect_base_component_register_fn_t)(void);
/**
* Function to initiate a connection to a remote process
* This function is invoked once by the openib BTL component during
* startup. It is intended to have CPC component-wide startup.
*
* Return value:
*
* - OMPI_SUCCESS: this CPC component will be used in selection during
* this process.
*
* - OMPI_ERR_NOT_SUPPORTED: this CPC component will be silently
* ignored in this process.
*
* - Other OMPI_ERR_* values: the error will be propagated upwards,
* likely causing a fatal error (and/or the openib BTL component
* being ignored).
*/
typedef int (*ompi_btl_openib_connect_base_func_start_connect_t)
(struct mca_btl_base_endpoint_t *e);
typedef int (*ompi_btl_openib_connect_base_component_init_fn_t)(void);
/**
* Query the CPC to see if it wants to run on a specific HCA
* Query the CPC to see if it wants to run on a specific port (i.e., a
* specific BTL module). If the component init function previously
* returned OMPI_SUCCESS, this function is invoked once per BTL module
* creation (i.e., for each port found by an MPI process). If this
* CPC wants to be used on this BTL module, it returns a CPC module
* that is specific to this BTL module.
*
* The BTL module in question is passed to the function; all of its
* attributes can be used to query to see if it's eligible for this
* CPC.
*
* If it is eligible, the CPC is responsible for creating a
* corresponding CPC module, filling in all the relevant fields on the
* modules, and for setting itself up to run (per above) and returning
* a CPC module (this is effectively the "module_init" function).
* Note that the module priority must be between 0 and 100
* (inclusive). When multiple CPCs are eligible for a single module,
* the CPC with the highest priority will be used.
*
* Return value:
*
* - OMPI_SUCCESS if this CPC is eligible for and was able to be setup
* for this BTL module. It is assumed that the CPC is now completely
* setup to run on this openib module (per description above).
*
* - OMPI_ERR_NOT_SUPPORTED if this CPC cannot support this BTL
* module. This is not an error; it's just the CPC saying "sorry, I
* cannot support this BTL module."
*
* - Other OMPI_ERR_* code: an error occurred.
*/
typedef int (*ompi_btl_openib_connect_base_func_query_t)(struct mca_btl_openib_hca_t *hca);
typedef int (*ompi_btl_openib_connect_base_func_component_query_t)
(struct mca_btl_openib_module_t *btl,
struct ompi_btl_openib_connect_base_module_t **cpc);
/**
* Function to finalize the connection functions
* This function is invoked once by the openib BTL component during
* shutdown. It is intended to have CPC component-wide shutdown.
*/
typedef int (*ompi_btl_openib_connect_base_func_finalize_t)(void);
typedef int (*ompi_btl_openib_connect_base_component_finalize_fn_t)(void);
struct ompi_btl_openib_connect_base_funcs_t {
/**
* CPC component struct
*/
struct ompi_btl_openib_connect_base_component_t {
/** Name of this set of connection functions */
char bcf_name[BCF_MAX_NAME];
char cbc_name[BCF_MAX_NAME];
/** Open function */
ompi_btl_openib_connect_base_func_open_t bcf_open;
/** Register function. Can be NULL. */
ompi_btl_openib_connect_base_component_register_fn_t cbc_register;
/** Init function */
ompi_btl_openib_connect_base_func_init_t bcf_init;
/** CPC component init function. Can be NULL. */
ompi_btl_openib_connect_base_component_init_fn_t cbc_init;
/** Query the CPC component to get a CPC module corresponding to
an openib BTL module. Cannot be NULL. */
ompi_btl_openib_connect_base_func_component_query_t cbc_query;
/** CPC component finalize function. Can be NULL. */
ompi_btl_openib_connect_base_component_finalize_fn_t cbc_finalize;
};
/**
* Convenience typedef
*/
typedef struct ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_base_component_t;
/************************************************************************/
/**
* Function called when an endpoint has been created and has been
* associated with a CPC.
*/
typedef int (*ompi_btl_openib_connect_base_module_endpoint_init_fn_t)
(struct mca_btl_base_endpoint_t *endpoint);
/**
* Function to initiate a connection to a remote process.
*/
typedef int (*ompi_btl_openib_connect_base_module_start_connect_fn_t)
(struct ompi_btl_openib_connect_base_module_t *cpc,
struct mca_btl_base_endpoint_t *endpoint);
/**
* Function called when an endpoint is being destroyed.
*/
typedef int (*ompi_btl_openib_connect_base_module_endpoint_finalize_fn_t)
(struct mca_btl_base_endpoint_t *endpoint);
/**
* Function to finalize the CPC module. It is called once when the
* CPC module's corresponding openib BTL module is being finalized.
*/
typedef int (*ompi_btl_openib_connect_base_module_finalize_fn_t)
(struct mca_btl_openib_module_t *btl,
struct ompi_btl_openib_connect_base_module_t *cpc);
/**
* Meta data about a CPC module. This is in a standalone struct
* because it is used in both the CPC module struct and the
* openib_btl_proc_t struct to hold information received from the
* modex.
*/
typedef struct ompi_btl_openib_connect_base_module_data_t {
/** Pointer back to the component. Used by the base and openib
btl to calculate this module's index for the modex. */
ompi_btl_openib_connect_base_component_t *cbm_component;
/** Priority of the CPC module (must be >=0 and <=100) */
uint8_t cbm_priority;
/** Blob that the CPC wants to include in the openib modex message
for a specific port, or NULL if the CPC does not want to
include a message in the modex. */
void *cbm_modex_message;
/** Length of the cbm_modex_message blob (0 if
cbm_modex_message==NULL). The message is intended to be short
(because the size of the modex broadcast is a function of
sum(cbm_modex_message_len[i]) for
i=(0...total_num_ports_in_MPI_job) -- e.g., IBCM imposes its
own [very short] limits (per IBTA volume 1, chapter 12). */
uint8_t cbm_modex_message_len;
} ompi_btl_openib_connect_base_module_data_t;
/**
* Struct for holding CPC module and associated meta data
*/
typedef struct ompi_btl_openib_connect_base_module_t {
/** Meta data about the module */
ompi_btl_openib_connect_base_module_data_t data;
/** Endpoint initialization function */
ompi_btl_openib_connect_base_module_endpoint_init_fn_t cbm_endpoint_init;
/** Connect function */
ompi_btl_openib_connect_base_func_start_connect_t bcf_start_connect;
ompi_btl_openib_connect_base_module_start_connect_fn_t cbm_start_connect;
/** Query function */
ompi_btl_openib_connect_base_func_query_t bcf_query;
/** Endpoint finalization function */
ompi_btl_openib_connect_base_module_endpoint_finalize_fn_t cbm_endpoint_finalize;
/** Finalize function */
ompi_btl_openib_connect_base_func_finalize_t bcf_finalize;
};
typedef struct ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_base_funcs_t;
/** Finalize the cpc module */
ompi_btl_openib_connect_base_module_finalize_fn_t cbm_finalize;
} ompi_btl_openib_connect_base_module_t;
END_C_DECLS

Просмотреть файл

@ -0,0 +1,29 @@
# -*- text -*-
#
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI's OpenFabrics IB CPC
# support.
#
[no cpcs for port]
No OpenFabrics connection schemes reported that they were able to be
used on a specific port. As such, the openib BTL (OpenFabrics
support) will be disabled for this port.
Host: %s
Device: %s
CPCs attempted: %s
#
[cpc name not found]
An invalid CPC name was specified via the btl_openib_cpc_%s MCA
parameter.
Host: %s
btl_openib_cpc_%s value: %s
Invalid name: %s
All possible valid names: %s

Просмотреть файл

@ -0,0 +1,35 @@
# -*- text -*-
#
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI's OpenFabrics IB CM
# support (the openib BTL).
#
[ib_cm function error]
An InfiniBand Connection Manager (IBCM) function received an unexpected
error. Your MPI job is likely to hang or crash; sorry.
Host: %s
Function: %s
Error: %d (%s)
#
[unhandled error]
The InfiniBand Connection Manager (IBCM) received an unexpected error
when it attempted to send a connection %s. Your MPI job is
likely to hang or crash; sorry.
Host: %s
Error code: %d
#
[timeout not found]
The InfiniBand Connection Manager (IBCM) timed out when sending a
connection %s, but Open MPI was unable to find a matching send
request. This should not happen. Your MPI job is likely to hang or
crash; sorry.
Host: %s

Просмотреть файл

@ -0,0 +1,19 @@
# -*- text -*-
#
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI's OpenFabrics RDMA CM
# support (the openib BTL).
#
[no valid ip]
It appears that a OpenFabrics device does not have an IP address
associated with it. The OpenFabrics RDMA CM connection scheme
*requires* IP addresses to be setup in order to function properly.
Host: %s
Device: %s