1
1

Removing trailing white spaces in all the openib btl code.

This commit was SVN r24855.
Этот коммит содержится в:
Yevgeny Kliteynik 2011-07-04 14:00:41 +00:00
родитель 5cae33503d
Коммит 4fbe68dd86
33 изменённых файлов: 547 добавлений и 547 удалений

Просмотреть файл

@ -5,16 +5,16 @@
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
# $COPYRIGHT$
#
#
# Additional copyrights may follow
#
#
# $HEADER$
#
@ -116,10 +116,10 @@ endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component)
mca_btl_openib_la_SOURCES = $(component_sources)
mca_btl_openib_la_LDFLAGS = -module -avoid-version $(btl_openib_LDFLAGS)
mca_btl_openib_la_LDFLAGS = -module -avoid-version $(btl_openib_LDFLAGS)
mca_btl_openib_la_LIBADD = $(btl_openib_LIBS)
noinst_LTLIBRARIES = $(lib)
libmca_btl_openib_la_SOURCES = $(lib_sources)
libmca_btl_openib_la_LDFLAGS= -module -avoid-version $(btl_openib_LDFLAGS)
libmca_btl_openib_la_LDFLAGS= -module -avoid-version $(btl_openib_LDFLAGS)
libmca_btl_openib_la_LIBADD = $(btl_openib_LIBS)

Просмотреть файл

@ -413,8 +413,8 @@ static int mca_btl_openib_size_queues(struct mca_btl_openib_module_t* openib_btl
goto out;
}
if (0 == openib_btl->num_peers &&
(mca_btl_openib_component.num_srq_qps > 0 ||
if (0 == openib_btl->num_peers &&
(mca_btl_openib_component.num_srq_qps > 0 ||
mca_btl_openib_component.num_xrc_qps > 0)) {
rc = create_srq(openib_btl);
}
@ -426,7 +426,7 @@ out:
mca_btl_openib_transport_type_t mca_btl_openib_get_transport_type(mca_btl_openib_module_t* openib_btl)
{
/* If we have a driver with RDMAoE supporting as the device struct contains the same type (IB) for
/* If we have a driver with RDMAoE supporting as the device struct contains the same type (IB) for
IBV_LINK_LAYER_INFINIBAND and IBV_LINK_LAYER_ETHERNET link layers and the single way
to detect this fact is to check their link_layer fields in a port_attr struct.
If our driver doesn't support this feature => the checking of transport type in device struct will be enough.
@ -455,7 +455,7 @@ mca_btl_openib_transport_type_t mca_btl_openib_get_transport_type(mca_btl_openib
case IBV_TRANSPORT_IWARP:
return MCA_BTL_OPENIB_TRANSPORT_IWARP;
case IBV_TRANSPORT_UNKNOWN:
case IBV_TRANSPORT_UNKNOWN:
default:
return MCA_BTL_OPENIB_TRANSPORT_UNKNOWN;
}
@ -464,7 +464,7 @@ mca_btl_openib_transport_type_t mca_btl_openib_get_transport_type(mca_btl_openib
#endif
}
static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
mca_btl_base_endpoint_t* endpoint)
{
int ret = OMPI_SUCCESS;
@ -485,7 +485,7 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
endpoint->rem_info.rem_vendor_id,
endpoint->rem_info.rem_vendor_part_id,
mca_btl_openib_transport_name_strings[endpoint->rem_info.rem_transport_type]);
return OMPI_ERROR;
}
@ -503,7 +503,7 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
}
if(openib_btl->device->mtu < endpoint->rem_info.rem_mtu) {
endpoint->rem_info.rem_mtu = openib_btl->device->mtu;
endpoint->rem_info.rem_mtu = openib_btl->device->mtu;
}
endpoint->use_eager_rdma = openib_btl->device->use_eager_rdma &
@ -520,11 +520,11 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
case BTL_OPENIB_RQ_SOURCE_MAX:
break;
/* If the queues configuration was set from command line
/* If the queues configuration was set from command line
(with --mca btl_openib_receive_queues parameter) => both sides have a same configuration */
/* In this case the local queues configuration was gotten from INI file =>
not possible that remote side got its queues configuration from command line =>
not possible that remote side got its queues configuration from command line =>
(by prio) the configuration was set from INI file or (if not configure)
by default queues configuration */
case BTL_OPENIB_RQ_SOURCE_DEVICE_INI:
@ -552,7 +552,7 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
}
break;
/* If the local queues configuration was set
/* If the local queues configuration was set
by default queues => check all possible cases for remote side and compare */
case BTL_OPENIB_RQ_SOURCE_DEFAULT:
if(NULL != values.receive_queues) {
@ -628,7 +628,7 @@ int mca_btl_openib_add_procs(
opal_output(-1, "add procs: adding proc %d", i);
/* OOB, XOOB, RDMACM, IBCM does not support SELF comunication, so
/* OOB, XOOB, RDMACM, IBCM does not support SELF comunication, so
* mark the prco as unreachable by openib btl */
if (OPAL_EQUAL == orte_util_compare_name_fields
(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, &ompi_proc->proc_name)) {
@ -705,7 +705,7 @@ int mca_btl_openib_add_procs(
on the peer has a matching CPC. */
assert(btl_rank <= ib_proc->proc_port_count);
assert(remote_matching_port != -1);
if (OMPI_SUCCESS !=
if (OMPI_SUCCESS !=
ompi_btl_openib_connect_base_find_match(openib_btl,
&(ib_proc->proc_ports[remote_matching_port]),
&local_cpc,
@ -751,8 +751,8 @@ int mca_btl_openib_add_procs(
}
}
#endif
mca_btl_openib_endpoint_init(openib_btl, endpoint,
local_cpc,
mca_btl_openib_endpoint_init(openib_btl, endpoint,
local_cpc,
&(ib_proc->proc_ports[remote_matching_port]),
remote_cpc_data);
@ -1139,7 +1139,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
BTL_VERBOSE(("frag->sg_entry.lkey = %" PRIu32 " .addr = %" PRIx64
" frag->segment.seg_key.key32[0] = %" PRIu32,
frag->sg_entry.lkey,
frag->sg_entry.lkey,
frag->sg_entry.addr,
frag->sg_entry.lkey));
@ -1260,7 +1260,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
BTL_VERBOSE(("frag->sg_entry.lkey = %" PRIu32 " .addr = %" PRIx64 " "
"frag->segment.seg_key.key32[0] = %" PRIu32,
frag->sg_entry.lkey,
frag->sg_entry.lkey,
frag->sg_entry.addr,
openib_reg->mr->rkey));
@ -1317,7 +1317,7 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
opal_hash_table_t *srq_addr_table =
&mca_btl_openib_component.srq_manager.srq_addr_table;
opal_mutex_lock(lock);
opal_mutex_lock(lock);
if (OPAL_SUCCESS !=
opal_hash_table_remove_value_ptr(srq_addr_table,
&openib_btl->qps[qp].u.srq_qp.srq,
@ -1353,7 +1353,7 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
}
if (NULL != openib_btl->qps) {
free(openib_btl->qps);
free(openib_btl->qps);
}
return rc;
@ -1399,7 +1399,7 @@ int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
/*
* Send immediate - Minimum function calls minimum checks, send the data ASAP.
* If BTL can't to send the messages imidiate, it creates messages descriptor
* If BTL can't to send the messages imidiate, it creates messages descriptor
* returns it to PML.
*/
int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl,
@ -1411,14 +1411,14 @@ int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl,
uint8_t order,
uint32_t flags,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t** descriptor)
mca_btl_base_descriptor_t** descriptor)
{
mca_btl_openib_module_t *obtl = (mca_btl_openib_module_t*)btl;
size_t size = payload_size + header_size;
size_t eager_limit;
int rc,
qp = frag_size_to_order(obtl, size),
prio = !(flags & MCA_BTL_DES_FLAGS_PRIORITY),
int rc,
qp = frag_size_to_order(obtl, size),
prio = !(flags & MCA_BTL_DES_FLAGS_PRIORITY),
ib_rc;
int32_t cm_return;
bool do_rdma = false;

Просмотреть файл

@ -142,7 +142,7 @@ typedef enum {
/* The structer for manage all BTL SRQs */
typedef struct mca_btl_openib_srq_manager_t {
opal_mutex_t lock;
/* The keys of this hash table are addresses of
/* The keys of this hash table are addresses of
SRQs structures, and the elements are BTL modules
pointers that associated with these SRQs */
opal_hash_table_t srq_addr_table;
@ -405,7 +405,7 @@ struct mca_btl_openib_module_srq_qp_t {
/** We post additional WQEs only if a number of WQEs (in specific SRQ) is less of this value.
The value increased together with rd_curr_num. The value is unique for every SRQ. */
int32_t rd_low_local;
/** The flag points if we want to get the
/** The flag points if we want to get the
IBV_EVENT_SRQ_LIMIT_REACHED events for dynamically resizing SRQ */
bool srq_limit_event_flag;
/**< In difference of the "--mca enable_srq_resize" parameter that says, if we want(or no)
@ -580,7 +580,7 @@ extern int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl,
uint32_t flags,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t** descriptor
);
);
/**
* PML->BTL Initiate a put of the specified size.

Просмотреть файл

@ -141,7 +141,7 @@ static int btl_openib_async_poll_init(struct mca_btl_openib_async_poll *devices_
}
/* Send command completion to main thread */
static int send_command_comp(int in)
static int send_command_comp(int in)
{
if (write(mca_btl_openib_component.async_comp_pipe[1], &in, sizeof(int)) < 0) {
BTL_ERROR(("Write failed [%d]",errno));
@ -227,7 +227,7 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p
return OMPI_SUCCESS;
}
/* The main idea of resizing SRQ algorithm -
/* The main idea of resizing SRQ algorithm -
We create a SRQ with size = rd_num, but for efficient usage of resources
the number of WQEs that we post = rd_curr_num < rd_num and this value is
increased (by needs) in IBV_EVENT_SRQ_LIMIT_REACHED event handler (i.e. in this function),
@ -327,7 +327,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
BTL_ERROR(("Alternative path migration event reported"));
if (APM_ENABLED) {
BTL_ERROR(("Trying to find additional path..."));
if (!xrc_event)
if (!xrc_event)
mca_btl_openib_load_apm(event.element.qp,
qp2endpoint(event.element.qp, device));
#if HAVE_XRC
@ -472,7 +472,7 @@ void* btl_openib_async_thread(void * async)
return PTHREAD_CANCELED;
}
int btl_openib_async_command_done(int exp)
int btl_openib_async_command_done(int exp)
{
int comp;
if (read(mca_btl_openib_component.async_comp_pipe[0], &comp,

Просмотреть файл

@ -284,27 +284,27 @@ static int btl_openib_modex_send(void)
* c. a uint8_t indicating the length of the blob to follow
* d. a blob that is only meaningful to that CPC
*/
msg_size =
msg_size =
/* uint8_t for number of modules in the message */
1 +
/* For each module: */
mca_btl_openib_component.ib_num_btls *
mca_btl_openib_component.ib_num_btls *
(
/* Common module data */
modex_message_size +
modex_message_size +
/* uint8_t for how many CPCs follow */
1
);
/* For each module, add in the size of the per-CPC data */
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
for (j = 0;
for (j = 0;
j < mca_btl_openib_component.openib_btls[i]->num_cpcs;
++j) {
msg_size +=
msg_size +=
/* uint8_t for the index of the CPC */
1 +
/* uint8_t for the CPC's priority */
1 +
1 +
/* uint8_t for the blob length */
1 +
/* blob length */
@ -337,15 +337,15 @@ static int btl_openib_modex_send(void)
(mca_btl_openib_component.openib_btls[i]->port_info).transport_type =
mca_btl_openib_get_transport_type(mca_btl_openib_component.openib_btls[i]);
memcpy(offset,
&(mca_btl_openib_component.openib_btls[i]->port_info),
memcpy(offset,
&(mca_btl_openib_component.openib_btls[i]->port_info),
size);
opal_output(-1, "modex packed btl port modex message: 0x%" PRIx64 ", %d, %d (size: %d)",
mca_btl_openib_component.openib_btls[i]->port_info.subnet_id,
mca_btl_openib_component.openib_btls[i]->port_info.mtu,
mca_btl_openib_component.openib_btls[i]->port_info.lid,
(int) size);
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
MCA_BTL_OPENIB_MODEX_MSG_HTON(*(mca_btl_openib_modex_message_t *)offset);
#endif
@ -354,20 +354,20 @@ static int btl_openib_modex_send(void)
i, (int) (offset -message));
/* Pack the number of CPCs that follow */
pack8(&offset,
pack8(&offset,
mca_btl_openib_component.openib_btls[i]->num_cpcs);
opal_output(-1, "modex packed btl %d: to pack %d cpcs (packed: %d, offset now %d)",
i, mca_btl_openib_component.openib_btls[i]->num_cpcs,
*((uint8_t*) (offset - 1)), (int) (offset-message));
/* Pack each CPC */
for (j = 0;
for (j = 0;
j < mca_btl_openib_component.openib_btls[i]->num_cpcs;
++j) {
uint8_t u8;
cpc = mca_btl_openib_component.openib_btls[i]->cpcs[j];
opal_output(-1, "modex packed btl %d: packing cpc %s",
opal_output(-1, "modex packed btl %d: packing cpc %s",
i, cpc->data.cbm_component->cbc_name);
/* Pack the CPC index */
u8 = ompi_btl_openib_connect_base_get_cpc_index(cpc->data.cbm_component);
@ -434,7 +434,7 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
case MCA_BTL_OPENIB_CONTROL_RDMA:
rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)ctl_hdr;
BTL_VERBOSE(("prior to NTOH received rkey %" PRIu32
BTL_VERBOSE(("prior to NTOH received rkey %" PRIu32
", rdma_start.lval %" PRIx64 ", pval %p, ival %" PRIu32,
rdma_hdr->rkey,
rdma_hdr->rdma_start.lval,
@ -446,7 +446,7 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
BTL_OPENIB_EAGER_RDMA_CONTROL_HEADER_NTOH(*rdma_hdr);
}
BTL_VERBOSE(("received rkey %" PRIu32
BTL_VERBOSE(("received rkey %" PRIu32
", rdma_start.lval %" PRIx64 ", pval %p,"
" ival %" PRIu32, rdma_hdr->rkey,
rdma_hdr->rdma_start.lval,
@ -633,7 +633,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
BTL_VERBOSE(("my iWARP subnet_id is %016" PRIx64, subnet_id));
} else {
memset(&gid, 0, sizeof(gid));
if (0 != ibv_query_gid(device->ib_dev_context, port_num,
if (0 != ibv_query_gid(device->ib_dev_context, port_num,
mca_btl_openib_component.gid_index, &gid)) {
BTL_ERROR(("ibv_query_gid failed (%s:%d, %d)\n",
ibv_get_device_name(device->ib_dev), port_num,
@ -643,7 +643,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
#ifdef OMPI_HAVE_RDMAOE
if (IBV_LINK_LAYER_ETHERNET == ib_port_attr->link_layer) {
subnet_id = mca_btl_openib_get_ip_subnet_id(device->ib_dev,
subnet_id = mca_btl_openib_get_ip_subnet_id(device->ib_dev,
port_num);
} else {
subnet_id = ntoh64(gid.global.subnet_prefix);
@ -652,11 +652,11 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
subnet_id = ntoh64(gid.global.subnet_prefix);
#endif
BTL_VERBOSE(("my IB subnet_id for HCA %s port %d is %016" PRIx64,
BTL_VERBOSE(("my IB subnet_id for HCA %s port %d is %016" PRIx64,
ibv_get_device_name(device->ib_dev), port_num, subnet_id));
}
#else
if (0 != ibv_query_gid(device->ib_dev_context, port_num,
if (0 != ibv_query_gid(device->ib_dev_context, port_num,
mca_btl_openib_component.gid_index, &gid)) {
BTL_ERROR(("ibv_query_gid failed (%s:%d, %d)\n",
ibv_get_device_name(device->ib_dev), port_num,
@ -664,7 +664,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
return OMPI_ERR_NOT_FOUND;
}
subnet_id = ntoh64(gid.global.subnet_prefix);
BTL_VERBOSE(("my IB-only subnet_id for HCA %s port %d is %016" PRIx64,
BTL_VERBOSE(("my IB-only subnet_id for HCA %s port %d is %016" PRIx64,
ibv_get_device_name(device->ib_dev), port_num, subnet_id));
#endif
@ -868,9 +868,9 @@ static void device_construct(mca_btl_openib_device_t *device)
#endif
device->qps = NULL;
#if OPAL_HAVE_THREADS
mca_btl_openib_component.async_pipe[0] =
mca_btl_openib_component.async_pipe[0] =
mca_btl_openib_component.async_pipe[1] = -1;
mca_btl_openib_component.async_comp_pipe[0] =
mca_btl_openib_component.async_comp_pipe[0] =
mca_btl_openib_component.async_comp_pipe[1] = -1;
#endif
OBJ_CONSTRUCT(&device->device_lock, opal_mutex_t);
@ -910,7 +910,7 @@ static void device_destruct(mca_btl_openib_device_t *device)
/* wait for ok from thread */
if (OMPI_SUCCESS != btl_openib_async_command_done(device_to_remove)){
goto device_error;
}
}
}
#endif
@ -1006,10 +1006,10 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
return OMPI_ERROR;
}
/* wait for ok from thread */
if (OMPI_SUCCESS !=
if (OMPI_SUCCESS !=
btl_openib_async_command_done(device->ib_dev_context->async_fd)) {
return OMPI_ERROR;
}
}
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
/* Prepare data for thread, but not starting it */
@ -1338,7 +1338,7 @@ static int setup_qps(void)
if (0 == opal_argv_count(queues)) {
orte_show_help("help-mpi-btl-openib.txt",
"no qps in receive_queues", true,
orte_process_info.nodename,
orte_process_info.nodename,
mca_btl_openib_component.receive_queues);
ret = OMPI_ERROR;
goto error;
@ -1357,7 +1357,7 @@ static int setup_qps(void)
num_xrc_qps++;
#else
orte_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
orte_process_info.nodename,
orte_process_info.nodename,
mca_btl_openib_component.receive_queues);
ret = OMPI_ERR_NOT_AVAILABLE;
goto error;
@ -1365,7 +1365,7 @@ static int setup_qps(void)
} else {
orte_show_help("help-mpi-btl-openib.txt",
"invalid qp type in receive_queues", true,
orte_process_info.nodename,
orte_process_info.nodename,
mca_btl_openib_component.receive_queues,
queues[qp]);
ret = OMPI_ERR_BAD_PARAM;
@ -1377,7 +1377,7 @@ static int setup_qps(void)
and SRQ */
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
orte_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
orte_process_info.nodename,
orte_process_info.nodename,
mca_btl_openib_component.receive_queues);
ret = OMPI_ERR_BAD_PARAM;
goto error;
@ -1385,8 +1385,8 @@ static int setup_qps(void)
/* Current XRC implementation can't used with btls_per_lid > 1 */
if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) {
orte_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
true, orte_process_info.nodename,
orte_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
true, orte_process_info.nodename,
mca_btl_openib_component.receive_queues, num_xrc_qps);
ret = OMPI_ERR_BAD_PARAM;
goto error;
@ -1676,13 +1676,13 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
ports and QPs on this device */
need_search = false;
if(-2 != mca_btl_openib_component.ib_max_inline_data) {
/* User has explicitly set btl_openib_max_inline_data MCA parameter
/* User has explicitly set btl_openib_max_inline_data MCA parameter
Per setup in _mca.c, we know that the MCA param value is guaranteed
to be >= -1 */
if (-1 == mca_btl_openib_component.ib_max_inline_data) {
need_search = true;
} else {
device->max_inline_data = (uint32_t)
device->max_inline_data = (uint32_t)
mca_btl_openib_component.ib_max_inline_data;
}
} else if (values.max_inline_data_set) {
@ -1691,20 +1691,20 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
} else if (values.max_inline_data >= 0) {
device->max_inline_data = (uint32_t) values.max_inline_data;
} else {
if(default_values.max_inline_data_set &&
if(default_values.max_inline_data_set &&
default_values.max_inline_data >= -1) {
BTL_ERROR(("Invalid max_inline_data value specified "
"in INI file (%d); using default value (%d)",
values.max_inline_data,
"in INI file (%d); using default value (%d)",
values.max_inline_data,
default_values.max_inline_data));
device->max_inline_data = (uint32_t)
device->max_inline_data = (uint32_t)
default_values.max_inline_data;
} else {
BTL_ERROR(("Invalid max_inline_data value specified "
"in INI file (%d)", values.max_inline_data));
ret = OMPI_ERR_BAD_PARAM;
goto error;
}
}
}
}
/* Horrible. :-( Per the thread starting here:
@ -1729,7 +1729,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
true, orte_process_info.nodename,
__FILE__, __LINE__, "ibv_create_cq",
strerror(errno), errno,
strerror(errno), errno,
ibv_get_device_name(device->ib_dev));
ret = OMPI_ERR_NOT_AVAILABLE;
goto error;
@ -1749,7 +1749,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
that's good enough */
init_attr.cap.max_inline_data = max_inline_data = 1 << 20;
while (max_inline_data > 0) {
qp = ibv_create_qp(device->ib_pd, &init_attr);
qp = ibv_create_qp(device->ib_pd, &init_attr);
if (NULL != qp) {
break;
}
@ -1779,7 +1779,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
/* Eager RDMA is not currently supported with progress threads */
if (device->use_eager_rdma && OMPI_ENABLE_PROGRESS_THREADS) {
device->use_eager_rdma = 0;
orte_show_help("help-mpi-btl-openib.txt",
orte_show_help("help-mpi-btl-openib.txt",
"eager RDMA and progress threads", true);
}
@ -1896,7 +1896,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
modules in this process. This is an unfortunate artifact
of the openib BTL startup sequence (see below for more
details). The first device will choose the receive_queues
value from: (in priority order):
value from: (in priority order):
1. If the btl_openib_receive_queues MCA param was
specified, use that.
@ -1991,7 +1991,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
shares one of the problems cited in case 8, below. So
we need to fail this scenario; print an error and
abort.
Case 8: one INI value, different than default
- MCA parameter: not specified
- default receive_queues: value A
@ -2036,7 +2036,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
Server 2:
HCA B: no receive_queues in INI file
HCA C: receive_queues specified in INI file
A will therefore use the default receive_queues
value. B and C will use C's INI receive_queues.
But note that modex [currently] only sends around
@ -2078,7 +2078,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
- device 1: receive_queues value B in INI file
- device 2: receive_queues value B in INI file
--> per case 8, fail with a show_help message.
Case 10: two devices with different INI values
- MCA parameter: not specified
- default receive_queues: value A
@ -2121,7 +2121,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
in the INI, then it must agree with
component.receive_queues. */
if (NULL != values.receive_queues) {
if (0 != strcmp(values.receive_queues,
if (0 != strcmp(values.receive_queues,
mca_btl_openib_component.receive_queues)) {
orte_show_help("help-mpi-btl-openib.txt",
"locally conflicting receive_queues", true,
@ -2187,7 +2187,7 @@ error:
if (OMPI_SUCCESS != ret) {
orte_show_help("help-mpi-btl-openib.txt",
"error in device init", true,
"error in device init", true,
orte_process_info.nodename,
ibv_get_device_name(device->ib_dev));
}
@ -2453,7 +2453,7 @@ btl_openib_component_init(int *num_btl_modules,
support, so the following test is [currently] good enough... */
value = opal_mem_hooks_support_level();
#if !OPAL_HAVE_THREADS
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
orte_show_help("help-mpi-btl-openib.txt",
"ptmalloc2 with no threads", true,
@ -2468,7 +2468,7 @@ btl_openib_component_init(int *num_btl_modules,
We have a memory manager if we have both FREE and MUNMAP
support */
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
ret = 0;
index = mca_base_param_find("mpi", NULL, "leave_pinned");
@ -2481,7 +2481,7 @@ btl_openib_component_init(int *num_btl_modules,
index = mca_base_param_find("mpi", NULL, "leave_pinned_pipeline");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
OPAL_SUCCESS == mca_base_param_lookup_source(index, &source,
OPAL_SUCCESS == mca_base_param_lookup_source(index, &source,
NULL)) {
if (0 == value && MCA_BASE_PARAM_SOURCE_DEFAULT == source) {
++ret;
@ -2498,10 +2498,10 @@ btl_openib_component_init(int *num_btl_modules,
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_source(index, &source,
NULL)) {
if (-1 == mca_btl_openib_component.ib_max_inline_data &&
if (-1 == mca_btl_openib_component.ib_max_inline_data &&
MCA_BASE_PARAM_SOURCE_DEFAULT == source) {
/* If the user has not explicitly set this MCA parameter
use max_inline_data value specified in the
use max_inline_data value specified in the
device-specific parameters INI file */
mca_btl_openib_component.ib_max_inline_data = -2;
}
@ -2644,7 +2644,7 @@ btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_component.async_thread = 0;
#endif
distance = dev_sorted[0].distance;
for (found = false, i = 0;
for (found = false, i = 0;
i < num_devs && (-1 == mca_btl_openib_component.ib_max_btls ||
mca_btl_openib_component.ib_num_btls <
mca_btl_openib_component.ib_max_btls); i++) {
@ -2740,7 +2740,7 @@ btl_openib_component_init(int *num_btl_modules,
}
#endif
/* For XRC:
/* For XRC:
* from this point we know if MCA_BTL_XRC_ENABLED it true or false */
/* Init XRC IB Addr hash table */
@ -2807,11 +2807,11 @@ btl_openib_component_init(int *num_btl_modules,
base device that doesn't have device->qps setup on it yet (remember
that some modules may share the same device, so when going through
to loop, we may hit a device that was already setup earlier in
the loop).
the loop).
We may to call for prepare_device_for_use() only after adding the btl
to mca_btl_openib_component.openib_btls, since the prepare_device_for_use
adds device to async thread that require access to
to mca_btl_openib_component.openib_btls, since the prepare_device_for_use
adds device to async thread that require access to
mca_btl_openib_component.openib_btls.
*/
@ -2829,7 +2829,7 @@ btl_openib_component_init(int *num_btl_modules,
ret = prepare_device_for_use(device);
if (OMPI_SUCCESS != ret) {
orte_show_help("help-mpi-btl-openib.txt",
"error in device init", true,
"error in device init", true,
orte_process_info.nodename,
ibv_get_device_name(device->ib_dev));
goto no_btls;
@ -2856,12 +2856,12 @@ btl_openib_component_init(int *num_btl_modules,
opal_argv_free(mca_btl_openib_component.if_exclude_list);
mca_btl_openib_component.if_exclude_list = NULL;
}
/* setup the fork warning message as we are sensitive
* to memory corruption issues when fork is called
*/
ompi_warn_fork();
return btls;
no_btls:
@ -2923,7 +2923,7 @@ static int progress_no_credits_pending_frags(mca_btl_base_endpoint_t *ep)
retransmit for us).
*/
for (len = opal_list_get_size(&ep->qps[qp].no_credits_pending_frags[pri]);
len > 0 &&
len > 0 &&
(ep->eager_rdma_remote.tokens > 0 ||
ep->qps[qp].u.pp_qp.sd_credits > 0 ||
!BTL_OPENIB_QP_TYPE_PP(qp)); --len) {
@ -3095,7 +3095,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
if (rcredits + credits > 0) {
int rc;
if (OMPI_SUCCESS !=
if (OMPI_SUCCESS !=
(rc = progress_no_credits_pending_frags(ep))) {
return rc;
}
@ -3363,7 +3363,7 @@ error:
* SQ and RQ WRs will automatically be flushed.
*/
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
if (IBV_WC_WR_FLUSH_ERR == wc->status &&
if (IBV_WC_WR_FLUSH_ERR == wc->status &&
IBV_TRANSPORT_IWARP == device->ib_dev->transport_type) {
return;
}
@ -3373,47 +3373,47 @@ error:
BTL_PEER_ERROR(remote_proc, ("error polling %s with status %s "
"status number %d for wr_id %" PRIx64 " opcode %d vendor error %d qp_idx %d",
cq_name[cq], btl_openib_component_status_to_string(wc->status),
wc->status, wc->wr_id,
wc->status, wc->wr_id,
wc->opcode, wc->vendor_err, qp));
orte_notifier.log_peer(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
remote_proc ? &remote_proc->proc_name : NULL,
"\n\tIB polling %s with status %s "
"status number %d for wr_id %llu opcode %d vendor error %d qp_idx %d",
cq_name[cq], btl_openib_component_status_to_string(wc->status),
wc->status, wc->wr_id,
wc->status, wc->wr_id,
wc->opcode, wc->vendor_err, qp);
}
if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status ||
IBV_WC_RETRY_EXC_ERR == wc->status) {
char *peer_hostname =
char *peer_hostname =
(NULL != endpoint->endpoint_proc->proc_ompi->proc_hostname) ?
endpoint->endpoint_proc->proc_ompi->proc_hostname :
endpoint->endpoint_proc->proc_ompi->proc_hostname :
"<unknown -- please run with mpi_keep_peer_hostnames=1>";
const char *device_name =
const char *device_name =
ibv_get_device_name(endpoint->qps[qp].qp->lcl_qp->context->device);
if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status) {
orte_show_help("help-mpi-btl-openib.txt",
BTL_OPENIB_QP_TYPE_PP(qp) ?
"pp rnr retry exceeded" :
BTL_OPENIB_QP_TYPE_PP(qp) ?
"pp rnr retry exceeded" :
"srq rnr retry exceeded", true,
orte_process_info.nodename, device_name,
peer_hostname);
orte_notifier.show_help(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
"help-mpi-btl-openib.txt",
BTL_OPENIB_QP_TYPE_PP(qp) ?
"pp rnr retry exceeded" :
BTL_OPENIB_QP_TYPE_PP(qp) ?
"pp rnr retry exceeded" :
"srq rnr retry exceeded",
orte_process_info.nodename, device_name,
peer_hostname);
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
orte_show_help("help-mpi-btl-openib.txt",
orte_show_help("help-mpi-btl-openib.txt",
"pp retry exceeded", true,
orte_process_info.nodename,
device_name, peer_hostname);
orte_notifier.show_help(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
"help-mpi-btl-openib.txt",
"help-mpi-btl-openib.txt",
"pp retry exceeded",
orte_process_info.nodename,
device_name, peer_hostname);

Просмотреть файл

@ -310,7 +310,7 @@ void mca_btl_openib_endpoint_init(mca_btl_openib_module_t *btl,
ep->rem_info.rem_lid = remote_proc_info->pm_port_info.lid;
ep->rem_info.rem_subnet_id = remote_proc_info->pm_port_info.subnet_id;
ep->rem_info.rem_mtu = remote_proc_info->pm_port_info.mtu;
opal_output(-1, "Got remote LID, subnet, MTU: %d, 0x%" PRIx64 ", %d",
opal_output(-1, "Got remote LID, subnet, MTU: %d, 0x%" PRIx64 ", %d",
ep->rem_info.rem_lid,
ep->rem_info.rem_subnet_id,
ep->rem_info.rem_mtu);
@ -508,7 +508,7 @@ static void cts_sent(mca_btl_base_module_t* btl,
/*
* Send CTS control fragment
*/
void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
{
mca_btl_openib_send_control_frag_t *sc_frag;
mca_btl_base_descriptor_t *base_des;
@ -665,8 +665,8 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
while(master && !opal_list_is_empty(&endpoint->ib_addr->pending_ep)) {
ep_item = opal_list_remove_first(&endpoint->ib_addr->pending_ep);
ep = (mca_btl_openib_endpoint_t *)ep_item;
if (OMPI_SUCCESS !=
ompi_btl_openib_connect_base_start(endpoint->endpoint_local_cpc,
if (OMPI_SUCCESS !=
ompi_btl_openib_connect_base_start(endpoint->endpoint_local_cpc,
ep)) {
BTL_ERROR(("Failed to connect pending endpoint\n"));
}
@ -874,7 +874,7 @@ static int mca_btl_openib_endpoint_send_eager_rdma(
rdma_hdr->control.type = MCA_BTL_OPENIB_CONTROL_RDMA;
rdma_hdr->rkey = endpoint->eager_rdma_local.reg->mr->rkey;
rdma_hdr->rdma_start.lval = ompi_ptr_ptol(endpoint->eager_rdma_local.base.pval);
BTL_VERBOSE(("sending rkey %" PRIu32 ", rdma_start.lval %" PRIx64
BTL_VERBOSE(("sending rkey %" PRIu32 ", rdma_start.lval %" PRIx64
", pval %p, ival %" PRIu32 " type %d and sizeof(rdma_hdr) %d\n",
rdma_hdr->rkey,
rdma_hdr->rdma_start.lval,

Просмотреть файл

@ -208,7 +208,7 @@ struct mca_btl_base_endpoint_t {
/** list of pending rget ops */
opal_list_t pending_get_frags;
/** list of pending rput ops */
opal_list_t pending_put_frags;
opal_list_t pending_put_frags;
/** number of available get tokens */
int32_t get_tokens;
@ -503,8 +503,8 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
#if OPAL_ENABLE_DEBUG
do {
ftr->seq = ep->eager_rdma_remote.seq;
} while (!OPAL_ATOMIC_CMPSET_32((int32_t*) &ep->eager_rdma_remote.seq,
(int32_t) ftr->seq,
} while (!OPAL_ATOMIC_CMPSET_32((int32_t*) &ep->eager_rdma_remote.seq,
(int32_t) ftr->seq,
(int32_t) (ftr->seq+1)));
#endif
if(ep->nbo)

Просмотреть файл

@ -56,7 +56,7 @@ static void dump_local_rdma_frags(mca_btl_openib_endpoint_t * endpoint);
* @param qp Queue pair that had the error
* @param remote_proc Pointer to process that had the error
* @param endpoint Pointer to endpoint that had the error
*/
*/
void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
mca_btl_base_descriptor_t *des,
int qp,
@ -84,7 +84,7 @@ void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
* way to figure out what type of message created the error because
* we need the information in the wc->imm_data field which does not
* exist when we have an error. So, nothing to do here but return. */
if ((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV) &&
if ((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV) &&
!BTL_OPENIB_QP_TYPE_PP(qp)) {
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
"SRQ RECV type=%d", openib_frag_type(des));
@ -108,7 +108,7 @@ void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
* B. It was some type of openib specific control message.
* Therefore, just drop the fragments and call up into the PML to
* disable this endpoint for future communication. */
if (((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV) &&
if (((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV) &&
(BTL_OPENIB_QP_TYPE_PP(qp))) ||
(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_CONTROL) ||
(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_EAGER_RDMA)) {
@ -244,7 +244,7 @@ void mca_btl_openib_handle_btl_error(mca_btl_openib_module_t* openib_btl) {
NULL, btlname);
if (NULL != btlname) free(btlname);
/* Now send out messages to all endpoints that we are disconnecting.
/* Now send out messages to all endpoints that we are disconnecting.
* Only do this to endpoints that are connected. Otherwise, the
* remote side does not yet have the information on this endpoint. */
for (i = 0; i < opal_pointer_array_get_size(openib_btl->device->endpoints); i++) {
@ -372,7 +372,7 @@ void btl_openib_handle_failover_control_messages(mca_btl_openib_control_header_t
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
"IB: rank=%d, control message (remote=%d), "
"moved local head by one (new=%d)",
ORTE_PROC_MY_NAME->vpid,
ORTE_PROC_MY_NAME->vpid,
newep->endpoint_proc->proc_ompi->proc_name.vpid,
newep->eager_rdma_local.head);
} else {
@ -399,9 +399,9 @@ void btl_openib_handle_failover_control_messages(mca_btl_openib_control_header_t
* and call the callback function with OMPI_ERROR. It walks through
* each qp with each priority and looks for both no_credits_pending_frags
* and no_wqe_pending_frags. It then looks for any pending_lazy_frags,
* pending_put_frags, and pending_get_frags. This function is only
* pending_put_frags, and pending_get_frags. This function is only
* called when running with failover support enabled. Note that
* the errout parameter allows the function to also be used as a
* the errout parameter allows the function to also be used as a
* debugging tool to see if there are any fragments on any of the
* queues.
* @param ep Pointer to endpoint that had error
@ -753,7 +753,7 @@ void mca_btl_openib_dump_all_local_rdma_frags(mca_btl_openib_device_t *device) {
/**
* This function is a debugging tool. If you notify a hang, you can
* call this function from a debugger and see if there are any
* call this function from a debugger and see if there are any
* messages stuck in any of the queues. If you call it with
* errout=true, then it will error them out. Otherwise, it will
* just print out the size of the queues with data in them.
@ -769,7 +769,7 @@ void mca_btl_openib_dump_all_internal_queues(bool errout) {
btl = mca_btl_openib_component.openib_btls[i];
module = &btl->super;
num_eps = opal_pointer_array_get_size(btl->device->endpoints);
/* Now, find the endpoint associated with it */
for (j = 0; j < num_eps; j++) {
ep = (mca_btl_openib_endpoint_t*)

Просмотреть файл

@ -3,9 +3,9 @@
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -174,7 +174,7 @@ static int service_pipe_cmd_add_fd(bool use_libevent, cmd_t *cmd)
if (use_libevent) {
/* Make an event for this fd */
ri->ri_event_used = true;
opal_event_set(opal_event_base, &ri->ri_event, ri->ri_fd,
opal_event_set(opal_event_base, &ri->ri_event, ri->ri_fd,
ri->ri_flags | OPAL_EV_PERSIST, service_fd_callback,
ri);
opal_event_add(&ri->ri_event, 0);
@ -248,13 +248,13 @@ static int service_pipe_cmd_remove_fd(cmd_t *cmd)
}
}
}
/* Let the caller know that we have stopped monitoring
this fd (if they care) */
if (NULL != cmd->pc_fn.event) {
cmd->pc_fn.event(cmd->pc_fd, 0, cmd->pc_context);
}
/* Remove this item from the list of registered items and
release it */
opal_list_remove_item(&registered_items, item);
@ -344,7 +344,7 @@ static bool service_pipe_cmd(void)
--waiting_for_ack_from_main_thread;
}
break;
default:
OPAL_OUTPUT((-1, "fd service thread: unknown pipe command!"));
break;
@ -390,7 +390,7 @@ static void *service_thread_start(void *context)
break;
}
OPAL_OUTPUT((-1, "fd service thread: back from pipe command"));
}
}
/* Go through all the registered events and see who had
activity */
@ -414,7 +414,7 @@ static void *service_thread_start(void *context)
/* If either was ready, invoke the callback */
if (0 != flags) {
OPAL_OUTPUT((-1, "fd service thread: invoking callback for registered fd %d", ri->ri_fd));
ri->ri_callback.event(ri->ri_fd, flags,
ri->ri_callback.event(ri->ri_fd, flags,
ri->ri_context);
OPAL_OUTPUT((-1, "fd service thread: back from callback for registered fd %d", ri->ri_fd));
}
@ -443,7 +443,7 @@ static void main_thread_event_callback(int fd, short event, void *context)
break;
default:
OPAL_OUTPUT((-1, "fd main thread: unknown pipe command: %d",
OPAL_OUTPUT((-1, "fd main thread: unknown pipe command: %d",
cmd.pc_cmd));
break;
}
@ -481,12 +481,12 @@ int ompi_btl_openib_fd_init(void)
/* Create a libevent event that is used in the main thread
to watch its pipe */
opal_event_set(opal_event_base, &main_thread_event, pipe_to_main_thread[0],
OPAL_EV_READ | OPAL_EV_PERSIST,
OPAL_EV_READ | OPAL_EV_PERSIST,
main_thread_event_callback, NULL);
opal_event_add(&main_thread_event, 0);
/* Start the service thread */
if (0 != pthread_create(&thread, NULL, service_thread_start,
if (0 != pthread_create(&thread, NULL, service_thread_start,
NULL)) {
int errno_save = errno;
opal_event_del(&main_thread_event);
@ -509,7 +509,7 @@ int ompi_btl_openib_fd_init(void)
* Start monitoring an fd
* Called by main or service thread; callback will be in service thread
*/
int ompi_btl_openib_fd_monitor(int fd, int flags,
int ompi_btl_openib_fd_monitor(int fd, int flags,
ompi_btl_openib_fd_event_callback_fn_t *callback,
void *context)
{
@ -542,7 +542,7 @@ int ompi_btl_openib_fd_monitor(int fd, int flags,
* Stop monitoring an fd
* Called by main or service thread; callback will be in service thread
*/
int ompi_btl_openib_fd_unmonitor(int fd,
int ompi_btl_openib_fd_unmonitor(int fd,
ompi_btl_openib_fd_event_callback_fn_t *callback,
void *context)
{
@ -552,7 +552,7 @@ int ompi_btl_openib_fd_unmonitor(int fd,
if (fd < 0) {
return OMPI_ERR_BAD_PARAM;
}
cmd.pc_cmd = CMD_REMOVE_FD;
cmd.pc_fd = fd;
cmd.pc_flags = 0;
@ -630,7 +630,7 @@ ompi_btl_openib_fd_main_thread_drain(void)
int nfds, ret;
fd_set rfds;
struct timeval tv;
while (1) {
FD_ZERO(&rfds);
FD_SET(pipe_to_main_thread[0], &rfds);
@ -665,10 +665,10 @@ int ompi_btl_openib_fd_finalize(void)
memset(&cmd, 0, cmd_size);
cmd.pc_cmd = CMD_TIME_TO_QUIT;
opal_fd_write(pipe_to_service_thread[1], cmd_size, &cmd);
pthread_join(thread, NULL);
opal_atomic_rmb();
opal_event_del(&main_thread_event);
close(pipe_to_service_thread[0]);

Просмотреть файл

@ -3,9 +3,9 @@
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -19,7 +19,7 @@ BEGIN_C_DECLS
/**
* Typedef for fd callback function
*/
typedef void *(ompi_btl_openib_fd_event_callback_fn_t)(int fd, int flags,
typedef void *(ompi_btl_openib_fd_event_callback_fn_t)(int fd, int flags,
void *context);
/**
@ -37,7 +37,7 @@ int ompi_btl_openib_fd_init(void);
* Start monitoring an fd.
* Called by main or service thread; callback will be in service thread.
*/
int ompi_btl_openib_fd_monitor(int fd, int flags,
int ompi_btl_openib_fd_monitor(int fd, int flags,
ompi_btl_openib_fd_event_callback_fn_t *callback,
void *context);
@ -45,7 +45,7 @@ int ompi_btl_openib_fd_monitor(int fd, int flags,
* Stop monitoring an fd.
* Called by main or service thread; callback will be in service thread.
*/
int ompi_btl_openib_fd_unmonitor(int fd,
int ompi_btl_openib_fd_unmonitor(int fd,
ompi_btl_openib_fd_event_callback_fn_t *callback,
void *context);

Просмотреть файл

@ -110,7 +110,7 @@ int ompi_btl_openib_ini_init(void)
#ifndef __WINDOWS__
char separator = ':';
#else
/* ':' is part of the path on Windows,
/* ':' is part of the path on Windows,
so use ';' instead. */
char separator = ';';
#endif
@ -414,7 +414,7 @@ static int parse_line(parsed_section_values_t *sv)
else if (0 == strcasecmp(key_buffer, "rdmacm_reject_causes_connect_error")) {
/* Single value */
sv->values.rdmacm_reject_causes_connect_error =
sv->values.rdmacm_reject_causes_connect_error =
(bool) ompi_btl_openib_ini_intify(value);
sv->values.rdmacm_reject_causes_connect_error_set = true;
}
@ -558,7 +558,7 @@ static int save_section(parsed_section_values_t *s)
}
if (NULL != s->values.receive_queues) {
h->values.receive_queues =
h->values.receive_queues =
strdup(s->values.receive_queues);
}
@ -568,9 +568,9 @@ static int save_section(parsed_section_values_t *s)
}
if (s->values.rdmacm_reject_causes_connect_error_set) {
h->values.rdmacm_reject_causes_connect_error =
h->values.rdmacm_reject_causes_connect_error =
s->values.rdmacm_reject_causes_connect_error;
h->values.rdmacm_reject_causes_connect_error_set =
h->values.rdmacm_reject_causes_connect_error_set =
true;
}

Просмотреть файл

@ -31,9 +31,9 @@
#include "btl_openib_ip.h"
#if OMPI_HAVE_RDMACM
/*
/*
* The cruft below maintains the linked list of rdma ipv4 addresses and their
* associated rdma device names and device port numbers.
* associated rdma device names and device port numbers.
*/
struct rdma_addr_list {
opal_list_item_t super;
@ -45,7 +45,7 @@ struct rdma_addr_list {
};
typedef struct rdma_addr_list rdma_addr_list_t;
static OBJ_CLASS_INSTANCE(rdma_addr_list_t, opal_list_item_t,
static OBJ_CLASS_INSTANCE(rdma_addr_list_t, opal_list_item_t,
NULL, NULL);
static opal_list_t *myaddrs = NULL;
@ -54,7 +54,7 @@ static char *stringify(uint32_t addr)
{
static char line[64];
memset(line, 0, sizeof(line));
snprintf(line, sizeof(line) - 1, "%d.%d.%d.%d (0x%x)",
snprintf(line, sizeof(line) - 1, "%d.%d.%d.%d (0x%x)",
#if defined(WORDS_BIGENDIAN)
(addr >> 24),
(addr >> 16) & 0xff,
@ -119,7 +119,7 @@ uint64_t mca_btl_openib_get_ip_subnet_id(struct ibv_device *ib_dev,
* mismatch if IP Aliases are being used. For more information on
* this, please read comment above mca_btl_openib_get_ip_subnet_id.
*/
uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs,
uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs,
uint8_t port)
{
opal_list_item_t *item;
@ -135,7 +135,7 @@ uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs,
item != opal_list_get_end(myaddrs);
item = opal_list_get_next(item)) {
struct rdma_addr_list *addr = (struct rdma_addr_list *)item;
if (!strcmp(addr->dev_name, verbs->device->name) &&
if (!strcmp(addr->dev_name, verbs->device->name) &&
port == addr->dev_port) {
BTL_VERBOSE(("FOUND: %s:%d is %s",
ibv_get_device_name(verbs->device), port,
@ -219,7 +219,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
subnet = ntohl(ipaddr->sin_addr.s_addr) & ~(all >> netmask);
opal_argv_free(temp);
if (subnet == list_subnet) {
if (subnet == list_subnet) {
return 0;
}
}
@ -261,7 +261,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
subnet = ntohl(ipaddr->sin_addr.s_addr) & ~(all >> netmask);
opal_argv_free(temp);
if (subnet == list_subnet) {
if (subnet == list_subnet) {
return 1;
}
}
@ -282,7 +282,7 @@ static int add_rdma_addr(struct sockaddr *ipaddr, uint32_t netmask)
/* Ensure that this IP address is not in 127.0.0.1/8. If it is,
skip it because we never want loopback addresses to be
considered RDMA devices that remote peers can use to connect
to.
to.
This check is necessary because of a change that almost went
into RDMA CM in OFED 1.5.1. We asked for a delay so that we
@ -356,11 +356,11 @@ static int add_rdma_addr(struct sockaddr *ipaddr, uint32_t netmask)
myaddr->addr = sinp->sin_addr.s_addr;
myaddr->subnet = ntohl(myaddr->addr) & ~(all >> netmask);
inet_ntop(sinp->sin_family, &sinp->sin_addr,
inet_ntop(sinp->sin_family, &sinp->sin_addr,
myaddr->addr_str, sizeof(myaddr->addr_str));
memcpy(myaddr->dev_name, cm_id->verbs->device->name, IBV_SYSFS_NAME_MAX);
myaddr->dev_port = cm_id->port_num;
BTL_VERBOSE(("Adding addr %s (0x%x) subnet 0x%x as %s:%d",
BTL_VERBOSE(("Adding addr %s (0x%x) subnet 0x%x as %s:%d",
myaddr->addr_str, myaddr->addr, myaddr->subnet,
myaddr->dev_name, myaddr->dev_port));
@ -400,7 +400,7 @@ int mca_btl_openib_build_rdma_addr_list(void)
}
return rc;
}
void mca_btl_openib_free_rdma_addr_list(void)
{
opal_list_item_t *item, *next;
@ -419,27 +419,27 @@ void mca_btl_openib_free_rdma_addr_list(void)
}
}
#else
#else
/* !OMPI_HAVE_RDMACM case */
uint64_t mca_btl_openib_get_ip_subnet_id(struct ibv_device *ib_dev,
uint8_t port)
uint8_t port)
{
return 0;
}
uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs,
uint8_t port)
uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs,
uint8_t port)
{
return 0;
}
int mca_btl_openib_build_rdma_addr_list(void)
int mca_btl_openib_build_rdma_addr_list(void)
{
return OMPI_SUCCESS;
}
void mca_btl_openib_free_rdma_addr_list(void)
void mca_btl_openib_free_rdma_addr_list(void)
{
}
#endif

Просмотреть файл

@ -32,7 +32,7 @@ extern uint64_t mca_btl_openib_get_ip_subnet_id(struct ibv_device *ib_dev,
* @param port (IN) physical port of the IBV device
* @return IPv4 Address
*/
extern uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs,
extern uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs,
uint8_t port);
/**

Просмотреть файл

@ -6,15 +6,15 @@
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -72,13 +72,13 @@ NAME_CHAR [A-Za-z0-9_\-\.\\\/]
<comment>[^*\n]* ; /* Eat up non '*'s */
<comment>"*"+[^*/\n]* ; /* Eat '*'s not followed by a '/' */
<comment>\n { ++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_NEWLINE; }
return BTL_OPENIB_INI_PARSE_NEWLINE; }
<comment>"*"+"/" { BEGIN(INITIAL); /* Done with block comment */
return BTL_OPENIB_INI_PARSE_NEWLINE; }
{WHITE}*\[{WHITE}* { BEGIN(section_name); }
<section_name>({NAME_CHAR}|{WHITE})*{NAME_CHAR}/{WHITE}*\] {
BEGIN(section_end);
BEGIN(section_end);
return BTL_OPENIB_INI_PARSE_SECTION; }
<section_name>\n { ++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_ERROR; }
@ -87,7 +87,7 @@ NAME_CHAR [A-Za-z0-9_\-\.\\\/]
++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_NEWLINE; }
{WHITE}*"="{WHITE}* { BEGIN(value);
{WHITE}*"="{WHITE}* { BEGIN(value);
return BTL_OPENIB_INI_PARSE_EQUAL; }
{WHITE}+ ; /* whitespace */
{CHAR}+ { return BTL_OPENIB_INI_PARSE_SINGLE_WORD; }
@ -95,7 +95,7 @@ NAME_CHAR [A-Za-z0-9_\-\.\\\/]
<value>{WHITE}*\n { BEGIN(INITIAL);
++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_NEWLINE; }
<value>[^\n]*[^\t \n]/[\t ]* {
<value>[^\n]*[^\t \n]/[\t ]* {
return BTL_OPENIB_INI_PARSE_VALUE; }
. { return BTL_OPENIB_INI_PARSE_ERROR; }
@ -107,10 +107,10 @@ NAME_CHAR [A-Za-z0-9_\-\.\\\/]
* This cleans up at the end of the parse (since, in this case, we
* always parse the entire file) and prevents a memory leak.
*/
static int finish_parsing(void)
static int finish_parsing(void)
{
if (NULL != YY_CURRENT_BUFFER) {
yy_delete_buffer(YY_CURRENT_BUFFER);
yy_delete_buffer(YY_CURRENT_BUFFER);
#if defined(YY_CURRENT_BUFFER_LVALUE)
YY_CURRENT_BUFFER_LVALUE = NULL;
#else

Просмотреть файл

@ -64,7 +64,7 @@ enum {
/*
* utility routine for string parameter registration
*/
static int reg_string(const char* param_name,
static int reg_string(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
const char* default_value, char **out_value,
@ -76,8 +76,8 @@ static int reg_string(const char* param_name,
param_name, param_desc, false, false,
default_value, &value);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_btl_openib_component.super.btl_version,
mca_base_param_reg_syn(index,
&mca_btl_openib_component.super.btl_version,
deprecated_param_name, true);
}
mca_base_param_lookup_string(index, &value);
@ -95,7 +95,7 @@ static int reg_string(const char* param_name,
/*
* utility routine for integer parameter registration
*/
static int reg_int(const char* param_name,
static int reg_int(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
int default_value, int *out_value, int flags)
@ -105,12 +105,12 @@ static int reg_int(const char* param_name,
param_name, param_desc, false, false,
default_value, NULL);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_btl_openib_component.super.btl_version,
mca_base_param_reg_syn(index,
&mca_btl_openib_component.super.btl_version,
deprecated_param_name, true);
}
mca_base_param_lookup_int(index, &value);
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == value) {
*out_value = value;
return OMPI_SUCCESS;
@ -193,7 +193,7 @@ int btl_openib_register_mca_params(void)
}
CHECK(reg_string("device_param_files", "hca_param_files",
"Colon-delimited list of INI-style files that contain device vendor/part-specific parameters (use semicolon for Windows)",
str, &mca_btl_openib_component.device_params_file_names,
str, &mca_btl_openib_component.device_params_file_names,
0));
free(str);
@ -264,11 +264,11 @@ int btl_openib_register_mca_params(void)
-1, &ival, REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
mca_btl_openib_component.ib_max_inline_data = (int32_t) ival;
CHECK(reg_string("pkey", "ib_pkey_val",
CHECK(reg_string("pkey", "ib_pkey_val",
"OpenFabrics partition key (pkey) value. "
"Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB partition key value (0x7fff)",
"0", &pkey, 0));
mca_btl_openib_component.ib_pkey_val =
mca_btl_openib_component.ib_pkey_val =
ompi_btl_openib_ini_intify(pkey) & MCA_BTL_IB_PKEY_MASK;
free(pkey);
@ -278,7 +278,7 @@ int btl_openib_register_mca_params(void)
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_psn = (uint32_t) ival;
CHECK(reg_int("ib_qp_ous_rd_atom", NULL,
CHECK(reg_int("ib_qp_ous_rd_atom", NULL,
"InfiniBand outstanding atomic reads "
"(must be >= 0)",
4, &ival, REGINT_GE_ZERO));
@ -402,7 +402,7 @@ int btl_openib_register_mca_params(void)
CHECK(reg_int("ib_path_record_service_level", NULL,
"Enable getting InfiniBand service level from PathRecord "
"(must be >= 0, 0 = disabled, positive = try to get the "
"service level from PathRecord)",
"service level from PathRecord)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_path_record_service_level = (uint32_t) ival;
#endif
@ -582,11 +582,11 @@ int btl_openib_register_mca_params(void)
CHECK(reg_string("receive_queues", NULL,
"Colon-delimited, comma-delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
default_qps, &mca_btl_openib_component.receive_queues,
default_qps, &mca_btl_openib_component.receive_queues,
0));
mca_btl_openib_component.receive_queues_source =
(0 == strcmp(default_qps,
mca_btl_openib_component.receive_queues)) ?
mca_btl_openib_component.receive_queues_source =
(0 == strcmp(default_qps,
mca_btl_openib_component.receive_queues)) ?
BTL_OPENIB_RQ_SOURCE_DEFAULT : BTL_OPENIB_RQ_SOURCE_MCA;
CHECK(reg_string("if_include", NULL,

Просмотреть файл

@ -172,7 +172,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
BTL_VERBOSE(("unpack: %d btls", module_proc->proc_port_count));
if (module_proc->proc_port_count > 0) {
module_proc->proc_ports = (mca_btl_openib_proc_modex_t *)
malloc(sizeof(mca_btl_openib_proc_modex_t) *
malloc(sizeof(mca_btl_openib_proc_modex_t) *
module_proc->proc_port_count);
} else {
module_proc->proc_ports = NULL;
@ -194,7 +194,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
/* Unpack the number of CPCs that follow */
unpack8(&offset, &(module_proc->proc_ports[i].pm_cpc_data_count));
BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)",
i, module_proc->proc_ports[i].pm_cpc_data_count,
i, module_proc->proc_ports[i].pm_cpc_data_count,
(int)(offset-((char*)message))));
module_proc->proc_ports[i].pm_cpc_data = (ompi_btl_openib_connect_base_module_data_t *)
calloc(module_proc->proc_ports[i].pm_cpc_data_count,
@ -211,15 +211,15 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
unpack8(&offset, &u8);
BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)",
i, j, u8, (int)(offset-(char*)message)));
cpcd->cbm_component =
cpcd->cbm_component =
ompi_btl_openib_connect_base_get_cpc_byindex(u8);
BTL_VERBOSE(("unpacked btl %d: cpc %d: component %s",
i, j, cpcd->cbm_component->cbc_name));
unpack8(&offset, &cpcd->cbm_priority);
unpack8(&offset, &cpcd->cbm_modex_message_len);
BTL_VERBOSE(("unpacked btl %d: cpc %d: priority %d, msg len %d (offset now %d)",
i, j, cpcd->cbm_priority,
i, j, cpcd->cbm_priority,
cpcd->cbm_modex_message_len,
(int)(offset-(char*)message)));
if (cpcd->cbm_modex_message_len > 0) {
@ -228,7 +228,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
BTL_ERROR(("Failed to malloc"));
return NULL;
}
memcpy(cpcd->cbm_modex_message, offset,
memcpy(cpcd->cbm_modex_message, offset,
cpcd->cbm_modex_message_len);
offset += cpcd->cbm_modex_message_len;
BTL_VERBOSE(("unpacked btl %d: cpc %d: blob unpacked %d %x (offset now %d)",
@ -244,7 +244,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
module_proc->proc_endpoints = NULL;
} else {
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(module_proc->proc_port_count *
malloc(module_proc->proc_port_count *
sizeof(mca_btl_base_endpoint_t*));
}
if (NULL == module_proc->proc_endpoints) {

Просмотреть файл

@ -47,7 +47,7 @@ typedef struct mca_btl_openib_proc_modex_t {
/** Array of the peer's CPCs available on this port */
ompi_btl_openib_connect_base_module_data_t *pm_cpc_data;
/** Length of the pm_cpc_data array */
uint8_t pm_cpc_data_count;
} mca_btl_openib_proc_modex_t;

Просмотреть файл

@ -6,7 +6,7 @@
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
@ -14,9 +14,9 @@
# Copyright (c) 2008 Mellanox Technologies. All rights reserved.
# Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved.
# $COPYRIGHT$
#
#
# Additional copyrights may follow
#
#
# $HEADER$
#
@ -29,7 +29,7 @@ AC_DEFUN([MCA_ompi_btl_openib_POST_CONFIG], [
])
# MCA_btl_openib_CONFIG([action-if-can-compile],
# MCA_btl_openib_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_ompi_btl_openib_CONFIG],[

Просмотреть файл

@ -3,9 +3,9 @@
* Copyright (c) 2009 Mellanox Technologies. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -74,13 +74,13 @@ int ompi_btl_openib_connect_base_get_cpc_index
ompi_btl_openib_connect_base_component_t *
ompi_btl_openib_connect_base_get_cpc_byindex(uint8_t index);
/*
/*
* Allocate a CTS frag
*/
int ompi_btl_openib_connect_base_alloc_cts(
struct mca_btl_base_endpoint_t *endpoint);
/*
/*
* Free a CTS frag
*/
int ompi_btl_openib_connect_base_free_cts(

Просмотреть файл

@ -3,9 +3,9 @@
* Copyright (c) 2007 Mellanox Technologies, Inc. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
#include "ompi_config.h"
@ -88,7 +88,7 @@ int ompi_btl_openib_connect_base_register(void)
all_cpc_names);
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"cpc_include", string, false, false,
"cpc_include", string, false, false,
NULL, &cpc_include);
free(string);
@ -97,7 +97,7 @@ int ompi_btl_openib_connect_base_register(void)
all_cpc_names);
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"cpc_exclude", string, false, false,
"cpc_exclude", string, false, false,
NULL, &cpc_exclude);
free(string);
@ -112,7 +112,7 @@ int ompi_btl_openib_connect_base_register(void)
temp = opal_argv_split(cpc_include, ',');
for (save = j = 0; NULL != temp[j]; ++j) {
for (i = 0; NULL != all[i]; ++i) {
if (0 == strcmp(temp[j], all[i]->cbc_name)) {
if (0 == strcmp(temp[j], all[i]->cbc_name)) {
opal_output(-1, "include: saving %s", all[i]->cbc_name);
available[save++] = all[i];
++num_available;
@ -123,7 +123,7 @@ int ompi_btl_openib_connect_base_register(void)
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
"cpc name not found", true,
"include", orte_process_info.nodename,
"include", cpc_include, temp[j],
"include", cpc_include, temp[j],
all_cpc_names);
opal_argv_free(temp);
free(all_cpc_names);
@ -141,7 +141,7 @@ int ompi_btl_openib_connect_base_register(void)
/* First: error check -- ensure that all the names are valid */
for (j = 0; NULL != temp[j]; ++j) {
for (i = 0; NULL != all[i]; ++i) {
if (0 == strcmp(temp[j], all[i]->cbc_name)) {
if (0 == strcmp(temp[j], all[i]->cbc_name)) {
break;
}
}
@ -149,7 +149,7 @@ int ompi_btl_openib_connect_base_register(void)
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
"cpc name not found", true,
"exclude", orte_process_info.nodename,
"exclude", cpc_exclude, temp[j],
"exclude", cpc_exclude, temp[j],
all_cpc_names);
opal_argv_free(temp);
free(all_cpc_names);
@ -171,13 +171,13 @@ int ompi_btl_openib_connect_base_register(void)
}
}
opal_argv_free(temp);
}
}
/* If there's no include/exclude list, copy all[] into available[] */
else {
opal_output(-1, "no include or exclude: saving all");
memcpy(available, all, sizeof(all));
num_available = (sizeof(all) /
num_available = (sizeof(all) /
sizeof(ompi_btl_openib_connect_base_module_t *)) - 1;
}
@ -241,7 +241,7 @@ int ompi_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *
int i, rc, cpc_index, len;
ompi_btl_openib_connect_base_module_t **cpcs;
cpcs = (ompi_btl_openib_connect_base_module_t **) calloc(num_available,
cpcs = (ompi_btl_openib_connect_base_module_t **) calloc(num_available,
sizeof(ompi_btl_openib_connect_base_module_t *));
if (NULL == cpcs) {
return OMPI_ERR_OUT_OF_RESOURCE;
@ -311,7 +311,7 @@ int ompi_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *
return OMPI_SUCCESS;
}
/*
/*
* This function is invoked when determining whether we have a CPC in
* common with a specific remote port. We already know that the
* subnet ID is the same between a specific local port and the target
@ -398,7 +398,7 @@ int ompi_btl_openib_connect_base_get_cpc_index(ompi_btl_openib_connect_base_comp
ompi_btl_openib_connect_base_component_t *
ompi_btl_openib_connect_base_get_cpc_byindex(uint8_t index)
{
return (index >= (sizeof(all) /
return (index >= (sizeof(all) /
sizeof(ompi_btl_openib_connect_base_module_t *))) ?
NULL : all[index];
}
@ -421,8 +421,8 @@ int ompi_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint)
return OMPI_ERR_OUT_OF_RESOURCE;
}
endpoint->endpoint_cts_mr =
ibv_reg_mr(endpoint->endpoint_btl->device->ib_pd,
endpoint->endpoint_cts_mr =
ibv_reg_mr(endpoint->endpoint_btl->device->ib_pd,
fli->ptr, length,
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ);
@ -438,14 +438,14 @@ int ompi_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint)
from underneath us. */
/* Copy the lkey where it needs to go */
endpoint->endpoint_cts_frag.super.sg_entry.lkey =
endpoint->endpoint_cts_frag.super.super.segment.seg_key.key32[0] =
endpoint->endpoint_cts_frag.super.sg_entry.lkey =
endpoint->endpoint_cts_frag.super.super.segment.seg_key.key32[0] =
endpoint->endpoint_cts_mr->lkey;
endpoint->endpoint_cts_frag.super.sg_entry.length = length;
/* Construct the rest of the recv_frag_t */
OBJ_CONSTRUCT(&(endpoint->endpoint_cts_frag), mca_btl_openib_recv_frag_t);
endpoint->endpoint_cts_frag.super.super.base.order =
endpoint->endpoint_cts_frag.super.super.base.order =
mca_btl_openib_component.credits_qp;
endpoint->endpoint_cts_frag.super.endpoint = endpoint;
OPAL_OUTPUT((-1, "Got a CTS frag for peer %s, addr %p, length %d, lkey %d",

Просмотреть файл

@ -2,9 +2,9 @@
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -16,7 +16,7 @@
static void empty_component_register(void);
static int empty_component_init(void);
static int empty_component_query(mca_btl_openib_module_t *btl,
static int empty_component_query(mca_btl_openib_module_t *btl,
ompi_btl_openib_connect_base_module_t **cpc);
ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_empty = {
@ -38,7 +38,7 @@ static int empty_component_init(void)
return OMPI_ERR_NOT_SUPPORTED;
}
static int empty_component_query(mca_btl_openib_module_t *btl,
static int empty_component_query(mca_btl_openib_module_t *btl,
ompi_btl_openib_connect_base_module_t **cpc)
{
/* Never let this CPC run */

Просмотреть файл

@ -2,9 +2,9 @@
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/

Просмотреть файл

@ -4,9 +4,9 @@
* Copyright (c) 2009 IBM Corporation. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -14,7 +14,7 @@
* TO-DO:
*
* - audit control values passed to req_send()
* - More show_help() throughout
* - More show_help() throughout
* - error handling in case of broken connection is not good; need to
* notify btl module safely
*/
@ -91,7 +91,7 @@
* tell IBCM (among other things) when the first message arrives on
* a QP when the RTU has not yet been received. This can happen, of
* course, since IBCM traffic is UD.
* - Also, note that IBCM "listener" IDs are per DEVICE, not per port.
* - Also, note that IBCM "listener" IDs are per DEVICE, not per port.
* - CM ID's are persistent throughout the life of a QP. If you
* destroy a CM ID (ib_cm_destroy_id), the IBCM system will tear
* down the connection. So the CM ID you get when receiving a
@ -179,7 +179,7 @@
*
* 1. The "wrong" process will send a single IBCM connection request
* to its peer on a bogus QP that was created just for this
* request.
* request.
* 2. The receiver will get the request, detect that it came
* in from the "wrong" direction, and reject it (IBCM has an
* explicit provision for rejecting incoming connections).
@ -366,7 +366,7 @@ typedef struct {
static void ibcm_listen_cm_id_constructor(ibcm_listen_cm_id_t *h);
static void ibcm_listen_cm_id_destructor(ibcm_listen_cm_id_t *h);
static OBJ_CLASS_INSTANCE(ibcm_listen_cm_id_t, opal_list_item_t,
static OBJ_CLASS_INSTANCE(ibcm_listen_cm_id_t, opal_list_item_t,
ibcm_listen_cm_id_constructor,
ibcm_listen_cm_id_destructor);
@ -452,7 +452,7 @@ typedef struct {
ibcm_module_t *ibcm_module;
} ibcm_module_list_item_t;
static OBJ_CLASS_INSTANCE(ibcm_module_list_item_t, opal_list_item_t,
static OBJ_CLASS_INSTANCE(ibcm_module_list_item_t, opal_list_item_t,
NULL, NULL);
/*
@ -506,7 +506,7 @@ typedef struct {
/*--------------------------------------------------------------------*/
static void ibcm_component_register(void);
static int ibcm_component_query(mca_btl_openib_module_t *btl,
static int ibcm_component_query(mca_btl_openib_module_t *btl,
ompi_btl_openib_connect_base_module_t **cpc);
static int ibcm_component_finalize(void);
@ -544,13 +544,13 @@ ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_ibcm = {
#define ENABLE_TIMERS (OPAL_ENABLE_DEBUG && 0)
#if ENABLE_TIMERS
#if ENABLE_TIMERS
#include MCA_timer_IMPLEMENTATION_HEADER
enum {
QUERY,
START_CONNECT,
QP_TO_RTR,
QP_TO_RTR,
QP_TO_RTS,
REQUEST_RECEIVED,
REPLY_RECEIVED,
@ -616,7 +616,7 @@ static void ibcm_component_register(void)
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_ibcm_gid_index",
"GID table index to use to obtain each port's GUID",
false, false, ibcm_gid_table_index,
false, false, ibcm_gid_table_index,
&ibcm_gid_table_index);
if (ibcm_gid_table_index < 0) {
ibcm_gid_table_index = 0;
@ -625,15 +625,15 @@ static void ibcm_component_register(void)
/*--------------------------------------------------------------------*/
/* The IB_CM_ASSIGN_SERVICE_ID value passed to ib_cm_listen function asks,
/* The IB_CM_ASSIGN_SERVICE_ID value passed to ib_cm_listen function asks,
* from IBCM , to assign service_id.
* The value was taken from IBCM kernel level
* The value was taken from IBCM kernel level
*/
#ifndef IB_CM_ASSIGN_SERVICE_ID
#define IB_CM_ASSIGN_SERVICE_ID hton64(0x0200000000000000ULL)
#endif
static int ibcm_component_query(mca_btl_openib_module_t *btl,
static int ibcm_component_query(mca_btl_openib_module_t *btl,
ompi_btl_openib_connect_base_module_t **cpc)
{
int rc;
@ -760,11 +760,11 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
goto error;
}
OPAL_OUTPUT((-1, "opened ibcm device 0x%" PRIx64 " (%s:%d)",
(uint64_t) cmh->cm_device,
(uint64_t) cmh->cm_device,
ibv_get_device_name(cmh->ib_context->device),
openib_btl->port_num));
if (0 != (rc = ib_cm_create_id(cmh->cm_device,
if (0 != (rc = ib_cm_create_id(cmh->cm_device,
&cmh->listen_cm_id, NULL))) {
/* Same rationale as above */
OBJ_RELEASE(cmh);
@ -774,12 +774,12 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
}
if (0 != (rc = ib_cm_listen(cmh->listen_cm_id, IB_CM_ASSIGN_SERVICE_ID, 0))) {
/* Same rationale as above */
OBJ_RELEASE(cmh);
BTL_ERROR(("failed to ib_cm_listen : rc=%d, errno=%d", rc, errno));
rc = OMPI_ERR_NOT_SUPPORTED;
goto error;
}
/* Same rationale as above */
OBJ_RELEASE(cmh);
BTL_ERROR(("failed to ib_cm_listen : rc=%d, errno=%d", rc, errno));
rc = OMPI_ERR_NOT_SUPPORTED;
goto error;
}
if (0 != (rc = ib_cm_attr_id(cmh->listen_cm_id, &(cmh->param)))) {
OBJ_RELEASE(cmh);
@ -822,7 +822,7 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
rc = OMPI_ERR_UNREACH;
goto error;
}
rc = ibv_query_gid(btl->device->ib_dev_context, btl->port_num, ibcm_gid_table_index,
rc = ibv_query_gid(btl->device->ib_dev_context, btl->port_num, ibcm_gid_table_index,
&gid);
if (0 != rc) {
BTL_ERROR(("system error (ibv_query_gid failed)"));
@ -863,8 +863,8 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
btl->port_num));
} else {
BTL_VERBOSE(("unavailable for use on %s:%d; fatal error %d (%s)",
ibv_get_device_name(btl->device->ib_dev),
btl->port_num, rc,
ibv_get_device_name(btl->device->ib_dev),
btl->port_num, rc,
opal_strerror(rc)));
}
return rc;
@ -895,8 +895,8 @@ static uint32_t max_inline_size(int qp, mca_btl_openib_device_t *device)
* Create the local side of one qp. The remote side will be connected
* later.
*/
static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
struct ibv_srq *srq, uint32_t max_recv_wr,
static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
struct ibv_srq *srq, uint32_t max_recv_wr,
uint32_t max_send_wr)
{
mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl;
@ -910,7 +910,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
init_attr.send_cq = openib_btl->device->ib_cq[BTL_OPENIB_LP_CQ];
init_attr.recv_cq = openib_btl->device->ib_cq[qp_cq_prio(qp)];
init_attr.srq = srq;
init_attr.cap.max_inline_data = req_inline =
init_attr.cap.max_inline_data = req_inline =
max_inline_size(qp, openib_btl->device);
init_attr.cap.max_send_sge = 1;
init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
@ -922,10 +922,10 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
}
init_attr.cap.max_send_wr = max_send_wr;
my_qp = ibv_create_qp(openib_btl->device->ib_pd, &init_attr);
if (NULL == my_qp) {
BTL_ERROR(("error creating qp errno says %s", strerror(errno)));
return OMPI_ERROR;
my_qp = ibv_create_qp(openib_btl->device->ib_pd, &init_attr);
if (NULL == my_qp) {
BTL_ERROR(("error creating qp errno says %s", strerror(errno)));
return OMPI_ERROR;
}
endpoint->qps[qp].qp->lcl_qp = my_qp;
if (init_attr.cap.max_inline_data < req_inline) {
@ -971,7 +971,7 @@ static int qp_create_all(mca_btl_base_endpoint_t* endpoint,
pp_qp_num = 1;
}
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
struct ibv_srq *srq = NULL;
uint32_t max_recv_wr, max_send_wr;
int32_t rd_rsv, rd_num_credits;
@ -985,7 +985,7 @@ static int qp_create_all(mca_btl_base_endpoint_t* endpoint,
}
if (BTL_OPENIB_QP_TYPE_PP(qp)) {
max_recv_wr = mca_btl_openib_component.qp_infos[qp].rd_num +
max_recv_wr = mca_btl_openib_component.qp_infos[qp].rd_num +
rd_rsv;
max_send_wr = mca_btl_openib_component.qp_infos[qp].rd_num +
rd_num_credits;
@ -1019,14 +1019,14 @@ static int fill_path_record(ibcm_module_t *m,
mca_btl_base_endpoint_t *endpoint,
struct ibv_sa_path_rec *path_rec)
{
modex_msg_t *remote_msg =
modex_msg_t *remote_msg =
(modex_msg_t*) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
modex_msg_t *local_msg =
modex_msg_t *local_msg =
(modex_msg_t*) m->cpc.data.cbm_modex_message;
/* Global attributes */
path_rec->dgid.global.subnet_prefix =
path_rec->sgid.global.subnet_prefix =
path_rec->dgid.global.subnet_prefix =
path_rec->sgid.global.subnet_prefix =
hton64(m->btl->port_info.subnet_id);
path_rec->dgid.global.interface_id = hton64(remote_msg->mm_port_guid);
path_rec->sgid.global.interface_id = hton64(local_msg->mm_port_guid);
@ -1064,7 +1064,7 @@ static int fill_path_record(ibcm_module_t *m,
path_rec->pkey = mca_btl_openib_component.ib_pkey_val;
if (0 == path_rec->pkey) {
uint16_t pkey;
ibv_query_pkey(endpoint->endpoint_btl->device->ib_dev_context,
ibv_query_pkey(endpoint->endpoint_btl->device->ib_dev_context,
endpoint->endpoint_btl->port_num, 0, &pkey);
path_rec->pkey = ntohs(pkey);
}
@ -1103,10 +1103,10 @@ static int fill_path_record(ibcm_module_t *m,
BTL_VERBOSE(("Got src/dest subnet id: 0x%" PRIx64 " / 0x%" PRIx64,
path_rec->sgid.global.subnet_prefix,
path_rec->dgid.global.subnet_prefix));
BTL_VERBOSE(("Got src/dest interface id: 0x%" PRIx64 " / 0x%" PRIx64,
BTL_VERBOSE(("Got src/dest interface id: 0x%" PRIx64 " / 0x%" PRIx64,
path_rec->sgid.global.interface_id,
path_rec->dgid.global.interface_id));
BTL_VERBOSE(("Got src/dest lid: 0x%x / 0x%x",
BTL_VERBOSE(("Got src/dest lid: 0x%x / 0x%x",
path_rec->slid, path_rec->dlid));
BTL_VERBOSE(("Got raw_traffic: %d", path_rec->raw_traffic));
@ -1131,7 +1131,7 @@ static int fill_path_record(ibcm_module_t *m,
static int ibcm_endpoint_init(struct mca_btl_base_endpoint_t *endpoint)
{
ibcm_endpoint_t *ie = endpoint->endpoint_local_cpc_data =
ibcm_endpoint_t *ie = endpoint->endpoint_local_cpc_data =
calloc(1, sizeof(ibcm_endpoint_t));
if (NULL == ie) {
BTL_ERROR(("malloc failed!"));
@ -1141,7 +1141,7 @@ static int ibcm_endpoint_init(struct mca_btl_base_endpoint_t *endpoint)
BTL_VERBOSE(("endpoint %p / %p", (void*)endpoint, (void*)ie));
ie->ie_cpc = endpoint->endpoint_local_cpc;
ie->ie_endpoint = endpoint;
ie->ie_qps_created =
ie->ie_qps_created =
ie->ie_recv_buffers_posted = false;
ie->ie_qps_to_connect = mca_btl_openib_component.num_qps;
@ -1159,12 +1159,12 @@ static int ibcm_endpoint_init(struct mca_btl_base_endpoint_t *endpoint)
static bool i_initiate(ibcm_module_t *m,
mca_btl_openib_endpoint_t *endpoint)
{
modex_msg_t *msg =
modex_msg_t *msg =
(modex_msg_t*) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
uint64_t my_port_guid = ntoh64(m->btl->device->ib_dev_attr.node_guid) +
uint64_t my_port_guid = ntoh64(m->btl->device->ib_dev_attr.node_guid) +
m->btl->port_num;
uint64_t service_id = m->cmh->param.service_id;
BTL_VERBOSE(("i_initiate: my guid (%0" PRIx64 "), msg guid (%0" PRIx64 ")",
my_port_guid, msg->mm_port_guid));
BTL_VERBOSE(("i_initiate: my service id (%d), msg service id (%d)",
@ -1172,7 +1172,7 @@ static bool i_initiate(ibcm_module_t *m,
return
(my_port_guid == msg->mm_port_guid &&
service_id < msg->mm_service_id) ? true :
service_id < msg->mm_service_id) ? true :
(my_port_guid < msg->mm_port_guid) ? true : false;
}
@ -1187,11 +1187,11 @@ static ibcm_request_t *alloc_request(ibcm_module_t *m, modex_msg_t *msg,
struct ib_cm_req_param *cm_req;
ibcm_request_t *req = OBJ_NEW(ibcm_request_t);
BTL_VERBOSE(("allocated cached req id: 0x%" PRIx64, (void*)req));
if (NULL == req) {
return NULL;
}
/* Create this CM ID */
if (0 != ib_cm_create_id(m->cmh->cm_device,
&(req->super.cm_id),
@ -1201,11 +1201,11 @@ static ibcm_request_t *alloc_request(ibcm_module_t *m, modex_msg_t *msg,
return NULL;
}
BTL_VERBOSE(("created CM ID 0x%" PRIx64, &(req->super.cm_id)));
/* This data is constant for all the QP's */
req->path_rec = *path_rec;
req->endpoint = endpoint;
cm_req = &(req->cm_req);
cm_req->qp_type = IBV_QPT_RC;
cm_req->alternate_path = NULL;
@ -1220,7 +1220,7 @@ static ibcm_request_t *alloc_request(ibcm_module_t *m, modex_msg_t *msg,
cm_req->remote_cm_response_timeout = 20;
cm_req->local_cm_response_timeout = 20;
cm_req->max_cm_retries = 5;
req->private_data.ireqd_pid = m->cmh->param.service_id;
req->private_data.ireqd_ep_index = endpoint->index;
@ -1249,21 +1249,21 @@ static void print_req(struct ib_cm_req_param *cm_req)
BTL_VERBOSE(("cm_req->max_cm_retries: %d", cm_req->max_cm_retries));
BTL_VERBOSE(("cm_req->srq: %d", cm_req->srq));
}
static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
mca_btl_base_endpoint_t *endpoint)
{
int i, rc;
ibcm_module_t *m = (ibcm_module_t *) cpc;
ibcm_endpoint_t *ie =
ibcm_endpoint_t *ie =
(ibcm_endpoint_t *) endpoint->endpoint_local_cpc_data;
modex_msg_t *msg =
modex_msg_t *msg =
(modex_msg_t*) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
struct ibv_sa_path_rec path_rec;
bool do_initiate;
TIMER_START(START_CONNECT);
BTL_VERBOSE(("endpoint %p (lid %d, ep index %d)",
BTL_VERBOSE(("endpoint %p (lid %d, ep index %d)",
(void*)endpoint, endpoint->endpoint_btl->port_info.lid,
endpoint->index));
@ -1291,7 +1291,7 @@ static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
rc = OMPI_ERR_NOT_FOUND;
goto err;
}
/* If we're not the initiator, make a bogus QP (must be done
before we make all the other QPs) */
@ -1303,14 +1303,14 @@ static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
}
ie->ie_bogus_qp = endpoint->qps[0].qp->lcl_qp;
}
/* Make the local side of all the QP's */
if (OMPI_SUCCESS != (rc = qp_create_all(endpoint, m))) {
goto err;
}
/* Check initiation direction (see comment above i_initiate()
function):
function):
- if this is the side that is not supposed to initiate, then
send a single bogus request that we expect to be rejected.
@ -1365,12 +1365,12 @@ static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
cm_req->srq = BTL_OPENIB_QP_TYPE_SRQ(i);
cm_req->qp_num = endpoint->qps[i].qp->lcl_qp->qp_num;
cm_req->starting_psn = endpoint->qps[i].qp->lcl_psn;
BTL_VERBOSE(("sending my qpn %d, psn %d",
BTL_VERBOSE(("sending my qpn %d, psn %d",
cm_req->qp_num, cm_req->starting_psn));
req->private_data.ireqd_request = req;
req->private_data.ireqd_qp_index = i;
/* Send the request */
BTL_VERBOSE(("sending connect request %d of %d (id %p)",
i, mca_btl_openib_component.num_qps,
@ -1409,7 +1409,7 @@ static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
cm_req->srq = 0;
cm_req->qp_num = ie->ie_bogus_qp->qp_num;
cm_req->starting_psn = 0;
BTL_VERBOSE(("sending BOGUS qpn %d, psn %d (id %p)",
BTL_VERBOSE(("sending BOGUS qpn %d, psn %d (id %p)",
cm_req->qp_num, cm_req->starting_psn,
(void*)req->super.cm_id));
@ -1472,7 +1472,7 @@ static void ibcm_listen_cm_id_destructor(ibcm_listen_cm_id_t *cmh)
/* Remove all the ibcm module items */
for (item = opal_list_remove_first(&(cmh->ibcm_modules));
NULL != item;
NULL != item;
item = opal_list_remove_first(&(cmh->ibcm_modules))) {
OBJ_RELEASE(item);
}
@ -1494,7 +1494,7 @@ static void ibcm_listen_cm_id_destructor(ibcm_listen_cm_id_t *cmh)
/* Stop monitoring the cm_device's fd (wait for it to be
released from the monitoring entity) */
ompi_btl_openib_fd_unmonitor(cmh->cm_device->fd,
ompi_btl_openib_fd_unmonitor(cmh->cm_device->fd,
callback_unlock,
(void*) &barrier);
@ -1527,7 +1527,7 @@ static void ibcm_listen_cm_id_destructor(ibcm_listen_cm_id_t *cmh)
/* Close the CM device */
if (NULL != cmh->cm_device) {
OPAL_OUTPUT((-1, "closing ibcm device 0x%" PRIx64 " (%s)",
(uint64_t) cmh->cm_device,
(uint64_t) cmh->cm_device,
ibv_get_device_name(cmh->ib_context->device)));
ib_cm_close_device(cmh->cm_device);
}
@ -1565,7 +1565,7 @@ static int ibcm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
ibcm_endpoint_t *ie =
(ibcm_endpoint_t *) endpoint->endpoint_local_cpc_data;
BTL_VERBOSE(("endpoint %p", (void*)endpoint));
/* Free the stuff we allocated in ibcm_module_init */
if (NULL != ie) {
int i;
@ -1603,7 +1603,7 @@ static int ibcm_module_finalize(mca_btl_openib_module_t *btl,
if (NULL != m && NULL != m->cmh) {
OBJ_RELEASE(m->cmh);
}
return OMPI_SUCCESS;
}
@ -1655,7 +1655,7 @@ static int qp_to_rtr(int qp_index, struct ib_cm_id *cm_id,
(mtu == IBV_MTU_2048) ? "2048" :
(mtu == IBV_MTU_4096) ? "4096" :
"unknown (!)"));
/* Move the QP into the INIT state */
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_INIT;
@ -1665,9 +1665,9 @@ static int qp_to_rtr(int qp_index, struct ib_cm_id *cm_id,
}
if (0 != ibv_modify_qp(qp, &attr, attr_mask)) {
BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno)));
BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno)));
return OMPI_ERROR;
}
}
/* Move the QP into the RTR state */
attr.qp_state = IBV_QPS_RTR;
@ -1687,7 +1687,7 @@ static int qp_to_rtr(int qp_index, struct ib_cm_id *cm_id,
/* IBM CM does not set these values for us */
attr.max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
attr.min_rnr_timer = mca_btl_openib_component.ib_min_rnr_timer;
if (0 != ibv_modify_qp(qp, &attr,
attr_mask |
IBV_QP_PATH_MTU |
@ -1696,9 +1696,9 @@ static int qp_to_rtr(int qp_index, struct ib_cm_id *cm_id,
)) {
BTL_ERROR(("error modifing QP to RTR errno says %s",
strerror(errno)));
return OMPI_ERROR;
return OMPI_ERROR;
}
/* All done */
TIMER_STOP(QP_TO_RTR);
return OMPI_SUCCESS;
@ -1727,9 +1727,9 @@ static int qp_to_rts(int qp_index, struct ib_cm_id *cm_id,
if (0 != (rc = ibv_modify_qp(qp, &attr, attr_mask))) {
BTL_ERROR(("error modifing QP (index %d) to RTS errno says %s; rc=%d, errno=%d",
qp_index, strerror(errno), rc, errno));
return OMPI_ERROR;
return OMPI_ERROR;
}
/* All done */
BTL_VERBOSE(("successfully set RTS"));
TIMER_STOP(QP_TO_RTS);
@ -1742,7 +1742,7 @@ static int qp_to_rts(int qp_index, struct ib_cm_id *cm_id,
*/
static void *callback_start_connect(void *context)
{
callback_start_connect_data_t *cbdata =
callback_start_connect_data_t *cbdata =
(callback_start_connect_data_t *) context;
BTL_VERBOSE(("ibcm scheduled callback: calling start_connect()"));
@ -1763,7 +1763,7 @@ static void *callback_start_connect(void *context)
/*
* Passive has received a connection request from a active
*/
static int request_received(ibcm_listen_cm_id_t *cmh,
static int request_received(ibcm_listen_cm_id_t *cmh,
struct ib_cm_event *event)
{
int i, rc = OMPI_ERROR;
@ -1794,7 +1794,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
device event, and have to find the ibcm_module_t (i.e., local
port/openib BTL module ) that corresponds to it. */
BTL_VERBOSE(("looking for ibcm module -- source port guid: 0x%" PRIx64 " (%p)",
ntoh64(req->primary_path->sgid.global.interface_id),
ntoh64(req->primary_path->sgid.global.interface_id),
(void*)cmh));
for (item = opal_list_get_first(&(cmh->ibcm_modules));
item != opal_list_get_end(&(cmh->ibcm_modules));
@ -1820,7 +1820,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
for (found = false, ib_proc = (mca_btl_openib_proc_t*)
opal_list_get_first(&mca_btl_openib_component.ib_procs);
!found &&
!found &&
ib_proc != (mca_btl_openib_proc_t*)
opal_list_get_end(&mca_btl_openib_component.ib_procs);
ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
@ -1828,7 +1828,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
/* Now cycle through all the endpoints on that proc */
for (i = 0; !found && i < (int) ib_proc->proc_endpoint_count; ++i) {
BTL_VERBOSE(("checking endpoint %d of %d (ep %p, cpc data %p)",
i, (int) ib_proc->proc_endpoint_count,
i, (int) ib_proc->proc_endpoint_count,
(void*)ib_proc->proc_endpoints[i],
(void*)ib_proc->proc_endpoints[i]->endpoint_remote_cpc_data));
if (NULL == ib_proc->proc_endpoints[i]->endpoint_remote_cpc_data) {
@ -1841,7 +1841,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
BTL_VERBOSE(("my LID %d, remote LID %d",
msg->mm_lid,
ntohs(req->primary_path->dlid)));
if (msg->mm_port_guid ==
if (msg->mm_port_guid ==
ntoh64(req->primary_path->dgid.global.interface_id) &&
msg->mm_service_id == active_private_data->ireqd_pid &&
msg->mm_port_num == req->port &&
@ -1885,7 +1885,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
rc = OMPI_SUCCESS;
} else if (ie->ie_connection_flags & CFLAGS_ONGOING) {
/* See if the request for this QP already arrived */
if (ie->ie_qps_created &&
if (ie->ie_qps_created &&
IBV_QPS_RESET != endpoint->qps[qp_index].qp->lcl_qp->state) {
BTL_VERBOSE(("this QP (%d) already connected",
qp_index));
@ -1934,7 +1934,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
are filled in during qp_to_rtr (because we don't get them until
we call ib_cm_attr_init()). We already have the remote LID,
subnet ID, and MTU from the port's modex message. */
endpoint->rem_info.rem_qps[qp_index].rem_psn =
endpoint->rem_info.rem_qps[qp_index].rem_psn =
event->param.req_rcvd.starting_psn;
endpoint->rem_info.rem_index = active_private_data->ireqd_ep_index;
@ -2030,14 +2030,14 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
rep->cm_rep.qp_num = endpoint->qps[qp_index].qp->lcl_qp->qp_num;
rep->cm_rep.srq = BTL_OPENIB_QP_TYPE_SRQ(qp_index);
rep->cm_rep.starting_psn = endpoint->qps[qp_index].qp->lcl_psn;
BTL_VERBOSE(("setting reply psn %d",
BTL_VERBOSE(("setting reply psn %d",
rep->cm_rep.starting_psn));
rep->cm_rep.responder_resources = req->responder_resources;
rep->cm_rep.initiator_depth = req->initiator_depth;
rep->cm_rep.target_ack_delay = 20;
rep->cm_rep.flow_control = req->flow_control;
rep->cm_rep.rnr_retry_count = req->rnr_retry_count;
rep->private_data.irepd_request = active_private_data->ireqd_request;
rep->private_data.irepd_reply = rep;
rep->private_data.irepd_qp_index = qp_index;
@ -2050,7 +2050,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
goto reject;
}
opal_list_append(&ibcm_pending_replies, &(rep->super.super));
TIMER_STOP(REQUEST_RECEIVED);
BTL_VERBOSE(("sent reply for qp index %d", qp_index));
return OMPI_SUCCESS;
@ -2058,21 +2058,21 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
reject:
/* Reject the request */
BTL_VERBOSE(("rejecting request"));
ib_cm_send_rej(event->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
ib_cm_send_rej(event->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
&rej_reason, sizeof(rej_reason),
event->private_data, sizeof(ibcm_req_data_t));
/* If we rejected because of the wrong direction, then initiate a
connection going the other direction. */
if (REJ_WRONG_DIRECTION == rej_reason) {
callback_start_connect_data_t *cbdata = malloc(sizeof(*cbdata));
if (NULL != cbdata) {
cbdata->cscd_cpc =
cbdata->cscd_cpc =
(ompi_btl_openib_connect_base_module_t *) imodule;
cbdata->cscd_endpoint = endpoint;
BTL_VERBOSE(("starting connect in other direction"));
ompi_btl_openib_fd_run_in_main(callback_start_connect, cbdata);
TIMER_STOP(REQUEST_RECEIVED);
return OMPI_SUCCESS;
}
@ -2085,7 +2085,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
endpoint);
return rc;
}
/*
* Callback (from main thread) when the endpoint has been connected
*/
@ -2102,8 +2102,8 @@ static void *callback_set_endpoint_cpc_complete(void *context)
/*
* Helper function to find a cached CM ID in a list
*/
static ibcm_base_cm_id_t *find_cm_id(struct ib_cm_id *cm_id,
*/
static ibcm_base_cm_id_t *find_cm_id(struct ib_cm_id *cm_id,
opal_list_t *list)
{
opal_list_item_t *item;
@ -2139,7 +2139,7 @@ static int reply_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
p->irepd_qp_index, (void*) endpoint));
ie = (ibcm_endpoint_t*) endpoint->endpoint_local_cpc_data;
endpoint->rem_info.rem_qps[p->irepd_qp_index].rem_psn =
endpoint->rem_info.rem_qps[p->irepd_qp_index].rem_psn =
event->param.rep_rcvd.starting_psn;
endpoint->rem_info.rem_index = p->irepd_ep_index;
@ -2253,7 +2253,7 @@ static int ready_to_use_received(ibcm_listen_cm_id_t *h,
static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
{
enum ib_cm_rej_reason reason = event->param.rej_rcvd.reason;
ibcm_reject_reason_t *rej_reason =
ibcm_reject_reason_t *rej_reason =
(ibcm_reject_reason_t *) event->param.rej_rcvd.ari;
TIMER_START(REJECT_RECEIVED);
@ -2268,7 +2268,7 @@ static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
(ibcm_req_data_t*) event->private_data;
ibcm_request_t *request = my_private_data->ireqd_request;
mca_btl_openib_endpoint_t *endpoint = request->endpoint;
ibcm_endpoint_t *ie = (ibcm_endpoint_t*)
ibcm_endpoint_t *ie = (ibcm_endpoint_t*)
endpoint->endpoint_local_cpc_data;
BTL_VERBOSE(("got WRONG_DIRECTION reject, endpoint: %p, pid %d, ep_index %d, qp_index %d",
@ -2283,9 +2283,9 @@ static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
/* Remove from the global pending_requests list because we
no longer need to handle errors for it */
BTL_VERBOSE(("reply received cm id %p -- original cached req %p",
(void*)cmh->listen_cm_id,
(void*)cmh->listen_cm_id,
(void*)request));
opal_list_remove_item(&ibcm_pending_requests,
opal_list_remove_item(&ibcm_pending_requests,
&(request->super.super));
/* We ack the event and then destroy the CM ID (you *must*
@ -2326,13 +2326,13 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
"unhandled error", true,
"request", orte_process_info.nodename,
"request", orte_process_info.nodename,
event->param.send_status);
} else {
ibcm_request_t *req;
BTL_ERROR(("Got timeout in IBCM request (CM ID: %p)",
BTL_ERROR(("Got timeout in IBCM request (CM ID: %p)",
(void*)event->cm_id));
req = (ibcm_request_t*) find_cm_id(event->cm_id,
req = (ibcm_request_t*) find_cm_id(event->cm_id,
&ibcm_pending_requests);
if (NULL == req) {
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
@ -2345,7 +2345,7 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
/* Communicate to the upper layer that the connection on this
endpoint has failed */
ompi_btl_openib_fd_run_in_main(mca_btl_openib_endpoint_invoke_error,
ompi_btl_openib_fd_run_in_main(mca_btl_openib_endpoint_invoke_error,
endpoint);
return OMPI_SUCCESS;
}
@ -2358,13 +2358,13 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
"unhandled error", true,
"reply", orte_process_info.nodename,
"reply", orte_process_info.nodename,
event->param.send_status);
} else {
ibcm_reply_t *rep;
BTL_ERROR(("Got timeout in IBCM reply (id: %p)",
(void*)event->cm_id));
rep = (ibcm_reply_t*) find_cm_id(event->cm_id,
rep = (ibcm_reply_t*) find_cm_id(event->cm_id,
&ibcm_pending_replies);
if (NULL == rep) {
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
@ -2377,7 +2377,7 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
/* Communicate to the upper layer that the connection on this
endpoint has failed */
ompi_btl_openib_fd_run_in_main(mca_btl_openib_endpoint_invoke_error,
ompi_btl_openib_fd_run_in_main(mca_btl_openib_endpoint_invoke_error,
endpoint);
return OMPI_SUCCESS;
}
@ -2390,7 +2390,7 @@ static void *ibcm_event_dispatch(int fd, int flags, void *context)
ibcm_listen_cm_id_t *cmh = (ibcm_listen_cm_id_t*) context;
struct ib_cm_event *e = NULL;
OPAL_OUTPUT((-1, "ibcm dispatch: on device 0x%" PRIx64", fd %d",
OPAL_OUTPUT((-1, "ibcm dispatch: on device 0x%" PRIx64", fd %d",
(uint64_t) cmh->cm_device, fd));
TIMER_START(CM_GET_EVENT);
/* Blocks until next event, which should be immediately (because
@ -2415,19 +2415,19 @@ static void *ibcm_event_dispatch(int fd, int flags, void *context)
/* Incoming request */
rc = request_received(cmh, e);
break;
case IB_CM_REP_RECEIVED:
OPAL_OUTPUT((-1, "ibcm dispatch: reply received on fd %d", fd));
/* Reply received */
rc = reply_received(cmh, e);
break;
case IB_CM_RTU_RECEIVED:
OPAL_OUTPUT((-1, "ibcm dispatch: RTU received on fd %d", fd));
/* Ready to use! */
rc = ready_to_use_received(cmh, e);
break;
case IB_CM_REJ_RECEIVED:
OPAL_OUTPUT((-1, "ibcm dispatch: reject received on fd %d", fd));
/* Rejected connection */
@ -2436,19 +2436,19 @@ static void *ibcm_event_dispatch(int fd, int flags, void *context)
ID could be freed */
want_ack = false;
break;
case IB_CM_REQ_ERROR:
OPAL_OUTPUT((-1, "ibcm dispatch: request error received on fd %d", fd));
/* Request error */
rc = request_error(cmh, e);
break;
case IB_CM_REP_ERROR:
OPAL_OUTPUT((-1, "ibcm dispatch: reply error received on fd %d", fd));
/* Reply error */
rc = reply_error(cmh, e);
break;
case IB_CM_DREQ_RECEIVED:
case IB_CM_DREP_RECEIVED:
case IB_CM_DREQ_ERROR:
@ -2459,7 +2459,7 @@ static void *ibcm_event_dispatch(int fd, int flags, void *context)
/* We don't care */
rc = OMPI_SUCCESS;
break;
default:
/* This would be odd */
OPAL_OUTPUT((-1, "ibcm dispatch: unhandled event received on fd %d", fd));

Просмотреть файл

@ -2,9 +2,9 @@
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/

Просмотреть файл

@ -5,21 +5,21 @@
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
* reserved.
* Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -39,7 +39,7 @@
#include "ompi/mca/dpm/dpm.h"
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#include "btl_openib_endpoint.h"
#include "btl_openib_proc.h"
#include "connect/connect.h"
#include "orte/util/show_help.h"
@ -88,7 +88,7 @@ static int oob_priority = 50;
static bool rml_recv_posted = false;
static void oob_component_register(void);
static int oob_component_query(mca_btl_openib_module_t *openib_btl,
static int oob_component_query(mca_btl_openib_module_t *openib_btl,
ompi_btl_openib_connect_base_module_t **cpc);
static int oob_component_finalize(void);
@ -102,14 +102,14 @@ static int qp_connect_all(mca_btl_base_endpoint_t* endpoint);
static int qp_create_all(mca_btl_base_endpoint_t* endpoint);
static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
struct ibv_srq *srq, uint32_t max_recv_wr, uint32_t max_send_wr);
static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
uint8_t message_type);
static void rml_send_cb(int status, orte_process_name_t* endpoint,
opal_buffer_t* buffer, orte_rml_tag_t tag,
static void rml_send_cb(int status, orte_process_name_t* endpoint,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
static void rml_recv_cb(int status, orte_process_name_t* process_name,
opal_buffer_t* buffer, orte_rml_tag_t tag,
static void rml_recv_cb(int status, orte_process_name_t* process_name,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
#if (ENABLE_DYNAMIC_SL)
@ -172,7 +172,7 @@ static void oob_component_register(void)
* Init function. Post non-blocking RML receive to accept incoming
* connection requests.
*/
static int oob_component_query(mca_btl_openib_module_t *btl,
static int oob_component_query(mca_btl_openib_module_t *btl,
ompi_btl_openib_connect_base_module_t **cpc)
{
int rc;
@ -180,7 +180,7 @@ static int oob_component_query(mca_btl_openib_module_t *btl,
/* If we have the transport_type member, check to ensure we're on
IB (this CPC will not work with iWarp). If we do not have the
transport_type member, then we must be < OFED v1.2, and
therefore we must be IB. */
therefore we must be IB. */
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
if (BTL_OPENIB_CONNECT_BASE_CHECK_IF_NOT_IB(btl)) {
opal_output_verbose(5, mca_btl_base_output,
@ -202,7 +202,7 @@ static int oob_component_query(mca_btl_openib_module_t *btl,
ensure to only post it *once*, because another btl may have
come in before this and already posted it. */
if (!rml_recv_posted) {
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
OMPI_RML_TAG_OPENIB,
ORTE_RML_PERSISTENT,
rml_recv_cb,
@ -261,7 +261,7 @@ static int oob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
if (OMPI_SUCCESS !=
(rc = send_connect_data(endpoint, ENDPOINT_CONNECT_REQUEST))) {
BTL_ERROR(("error sending connect request, error code %d", rc));
BTL_ERROR(("error sending connect request, error code %d", rc));
return rc;
}
@ -330,7 +330,7 @@ static int reply_start_connect(mca_btl_openib_endpoint_t *endpoint,
/* Set the remote side info */
set_remote_info(endpoint, rem_info);
/* Connect to remote endpoint qp's */
if (OMPI_SUCCESS != (rc = qp_connect_all(endpoint))) {
return rc;
@ -352,14 +352,14 @@ static int set_remote_info(mca_btl_base_endpoint_t* endpoint,
mca_btl_openib_rem_info_t* rem_info)
{
/* copy the rem_info stuff */
memcpy(&((mca_btl_openib_endpoint_t*) endpoint)->rem_info,
rem_info, sizeof(mca_btl_openib_rem_info_t));
memcpy(&((mca_btl_openib_endpoint_t*) endpoint)->rem_info,
rem_info, sizeof(mca_btl_openib_rem_info_t));
/* copy over the rem qp info */
memcpy(endpoint->rem_info.rem_qps,
rem_info->rem_qps, sizeof(mca_btl_openib_rem_qp_info_t) *
rem_info->rem_qps, sizeof(mca_btl_openib_rem_qp_info_t) *
mca_btl_openib_component.num_qps);
BTL_VERBOSE(("Setting QP info, LID = %d", endpoint->rem_info.rem_lid));
return ORTE_SUCCESS;
@ -433,7 +433,7 @@ static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint)
IBV_QP_MIN_RNR_TIMER)) {
BTL_ERROR(("error modifing QP to RTR errno says %s",
strerror(errno)));
return OMPI_ERROR;
return OMPI_ERROR;
}
attr.qp_state = IBV_QPS_RTS;
attr.timeout = mca_btl_openib_component.ib_timeout;
@ -482,7 +482,7 @@ static int qp_create_all(mca_btl_base_endpoint_t* endpoint)
if(0 == pp_qp_num && true == endpoint->use_eager_rdma)
pp_qp_num = 1;
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
struct ibv_srq *srq = NULL;
uint32_t max_recv_wr, max_send_wr;
int32_t rd_rsv, rd_num_credits;
@ -540,7 +540,7 @@ static uint32_t max_inline_size(int qp, mca_btl_openib_device_t *device)
* Create the local side of one qp. The remote side will be connected
* later.
*/
static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
struct ibv_srq *srq, uint32_t max_recv_wr, uint32_t max_send_wr)
{
mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl;
@ -556,7 +556,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
init_attr.send_cq = openib_btl->device->ib_cq[BTL_OPENIB_LP_CQ];
init_attr.recv_cq = openib_btl->device->ib_cq[qp_cq_prio(qp)];
init_attr.srq = srq;
init_attr.cap.max_inline_data = req_inline =
init_attr.cap.max_inline_data = req_inline =
max_inline_size(qp, openib_btl->device);
init_attr.cap.max_send_sge = 1;
init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
@ -567,11 +567,11 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
}
init_attr.cap.max_send_wr = max_send_wr;
my_qp = ibv_create_qp(openib_btl->device->ib_pd, &init_attr);
if (NULL == my_qp) {
BTL_ERROR(("error creating qp errno says %s", strerror(errno)));
return OMPI_ERROR;
my_qp = ibv_create_qp(openib_btl->device->ib_pd, &init_attr);
if (NULL == my_qp) {
BTL_ERROR(("error creating qp errno says %s", strerror(errno)));
return OMPI_ERROR;
}
endpoint->qps[qp].qp->lcl_qp = my_qp;
@ -585,21 +585,21 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
} else {
endpoint->qps[qp].ib_inline_max = req_inline;
}
attr.qp_state = IBV_QPS_INIT;
attr.pkey_index = openib_btl->pkey_index;
attr.port_num = openib_btl->port_num;
attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
if (ibv_modify_qp(endpoint->qps[qp].qp->lcl_qp,
&attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS )) {
BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno)));
return OMPI_ERROR;
}
if (ibv_modify_qp(endpoint->qps[qp].qp->lcl_qp,
&attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS )) {
BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno)));
return OMPI_ERROR;
}
/* Setup meta data on the endpoint */
endpoint->qps[qp].qp->lcl_psn = lrand48() & 0xffffff;
@ -612,18 +612,18 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
/*
* RML send connect information to remote endpoint
*/
static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
uint8_t message_type)
{
opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t);
int rc;
if (NULL == buffer) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* pack the info in the send buffer */
/* pack the info in the send buffer */
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8));
rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8);
if (ORTE_SUCCESS != rc) {
@ -659,7 +659,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
if (message_type != ENDPOINT_CONNECT_ACK) {
int qp;
/* stuff all the QP info into the buffer */
for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num,
1, OPAL_UINT32);
@ -669,13 +669,13 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
}
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1,
OPAL_UINT32);
OPAL_UINT32);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
@ -698,7 +698,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
}
/* send to remote endpoint */
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name,
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name,
buffer, OMPI_RML_TAG_OPENIB, 0,
rml_send_cb, NULL);
if (ORTE_SUCCESS != rc) {
@ -706,7 +706,7 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
return rc;
}
BTL_VERBOSE(("Sent QP Info, LID = %d, SUBNET = %" PRIx64 "\n",
endpoint->endpoint_btl->lid,
endpoint->endpoint_btl->lid,
endpoint->subnet_id));
return OMPI_SUCCESS;
@ -717,8 +717,8 @@ static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
* Callback when we have finished RML sending the connect data to a
* remote peer
*/
static void rml_send_cb(int status, orte_process_name_t* endpoint,
opal_buffer_t* buffer, orte_rml_tag_t tag,
static void rml_send_cb(int status, orte_process_name_t* endpoint,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
OBJ_RELEASE(buffer);
@ -730,8 +730,8 @@ static void rml_send_cb(int status, orte_process_name_t* endpoint,
* and if this endpoint is trying to connect, reply with our QP info,
* otherwise try to modify QP's and establish reliable connection
*/
static void rml_recv_cb(int status, orte_process_name_t* process_name,
opal_buffer_t* buffer, orte_rml_tag_t tag,
static void rml_recv_cb(int status, orte_process_name_t* process_name,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
mca_btl_openib_proc_t *ib_proc;
@ -744,9 +744,9 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
mca_btl_openib_rem_info_t rem_info;
uint8_t message_type;
bool master;
/* start by unpacking data first so we know who is knocking at
our door */
/* start by unpacking data first so we know who is knocking at
our door */
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8));
rc = opal_dss.unpack(buffer, &message_type, &cnt, OPAL_UINT8);
if (ORTE_SUCCESS != rc) {
@ -754,7 +754,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
mca_btl_openib_endpoint_invoke_error(NULL);
return;
}
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64));
rc = opal_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, OPAL_UINT64);
if (ORTE_SUCCESS != rc) {
@ -762,7 +762,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
mca_btl_openib_endpoint_invoke_error(NULL);
return;
}
if (ENDPOINT_CONNECT_REQUEST != message_type) {
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &lcl_qp, &cnt, OPAL_UINT32);
@ -780,14 +780,14 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
}
}
if (ENDPOINT_CONNECT_ACK != message_type) {
int qp;
int qp;
/* get ready for the data */
rem_info.rem_qps =
(mca_btl_openib_rem_qp_info_t*) malloc(sizeof(mca_btl_openib_rem_qp_info_t) *
rem_info.rem_qps =
(mca_btl_openib_rem_qp_info_t*) malloc(sizeof(mca_btl_openib_rem_qp_info_t) *
mca_btl_openib_component.num_qps);
/* unpack all the qp info */
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt,
OPAL_UINT32);
@ -805,7 +805,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
return;
}
}
BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
rc = opal_dss.unpack(buffer, &rem_info.rem_lid, &cnt, OPAL_UINT16);
if (ORTE_SUCCESS != rc) {
@ -828,14 +828,14 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
return;
}
}
BTL_VERBOSE(("Received QP Info, LID = %d, SUBNET = %" PRIx64 "\n",
rem_info.rem_lid,
rem_info.rem_lid,
rem_info.rem_subnet_id));
master = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME,
process_name) > 0 ? true : false;
/* Need to protect the ib_procs list */
OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
@ -845,16 +845,16 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
opal_list_get_end(&mca_btl_openib_component.ib_procs);
ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
bool found = false;
if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&ib_proc->proc_ompi->proc_name, process_name) != OPAL_EQUAL) {
continue;
}
if (ENDPOINT_CONNECT_REQUEST != message_type) {
/* This is a reply message. Try to get the endpoint
instance the reply belongs to */
for (i = 0; i < ib_proc->proc_endpoint_count; i++) {
for (i = 0; i < ib_proc->proc_endpoint_count; i++) {
ib_endpoint = ib_proc->proc_endpoints[i];
if (ib_endpoint->qps[0].qp->lcl_qp != NULL &&
lcl_lid == ib_endpoint->endpoint_btl->lid &&
@ -872,7 +872,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
mca_btl_openib_endpoint_t *ib_endpoint_found = NULL;
int master_first_closed = -1;
for (i = 0; i < ib_proc->proc_endpoint_count; i++) {
for (i = 0; i < ib_proc->proc_endpoint_count; i++) {
ib_endpoint = ib_proc->proc_endpoints[i];
if (ib_endpoint->subnet_id != rem_info.rem_subnet_id ||
(ib_endpoint->endpoint_state != MCA_BTL_IB_CONNECTING
@ -894,7 +894,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
break; /* Found one. No point to continue */
}
ib_endpoint = ib_endpoint_found;
if (found && master &&
MCA_BTL_IB_CLOSED == ib_endpoint->endpoint_state ) {
/* since this is master and no endpoints found in
@ -912,17 +912,17 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
return;
}
}
if (!found) {
BTL_ERROR(("can't find suitable endpoint for this peer\n"));
BTL_ERROR(("can't find suitable endpoint for this peer\n"));
mca_btl_openib_endpoint_invoke_error(NULL);
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
return;
return;
}
OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
endpoint_state = ib_endpoint->endpoint_state;
/* Update status */
switch (endpoint_state) {
case MCA_BTL_IB_CLOSED :
@ -933,17 +933,17 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
if (master) {
rc = reply_start_connect(ib_endpoint, &rem_info);
} else {
rc = oob_module_start_connect(ib_endpoint->endpoint_local_cpc,
rc = oob_module_start_connect(ib_endpoint->endpoint_local_cpc,
ib_endpoint);
}
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("error in endpoint reply start connect"));
mca_btl_openib_endpoint_invoke_error(ib_endpoint);
OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
break;
}
/* As long as we expect a message from the peer (in order
to setup the connection) let the event engine pool the
RML events. Note: we increment it once peer active
@ -951,16 +951,16 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
opal_progress_event_users_increment();
OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
break;
case MCA_BTL_IB_CONNECTING :
set_remote_info(ib_endpoint, &rem_info);
if (OMPI_SUCCESS != (rc = qp_connect_all(ib_endpoint))) {
BTL_ERROR(("endpoint connect error: %d", rc));
BTL_ERROR(("endpoint connect error: %d", rc));
mca_btl_openib_endpoint_invoke_error(ib_endpoint);
OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
break;
}
if (master) {
ib_endpoint->endpoint_state = MCA_BTL_IB_WAITING_ACK;
@ -974,20 +974,20 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
/* cpc complete unlock the endpoint */
}
break;
case MCA_BTL_IB_WAITING_ACK:
/* Tell main BTL that we're done */
mca_btl_openib_endpoint_cpc_complete(ib_endpoint);
/* cpc complete unlock the endpoint */
break;
case MCA_BTL_IB_CONNECT_ACK:
send_connect_data(ib_endpoint, ENDPOINT_CONNECT_ACK);
/* Tell main BTL that we're done */
mca_btl_openib_endpoint_cpc_complete(ib_endpoint);
/* cpc complete unlock the endpoint */
break;
case MCA_BTL_IB_CONNECTED:
OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
break;

Просмотреть файл

@ -2,9 +2,9 @@
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/

Просмотреть файл

@ -108,7 +108,7 @@ typedef struct {
static void rdmacm_contents_constructor(rdmacm_contents_t *contents);
static void rdmacm_contents_destructor(rdmacm_contents_t *contents);
OBJ_CLASS_INSTANCE(rdmacm_contents_t, opal_list_item_t,
OBJ_CLASS_INSTANCE(rdmacm_contents_t, opal_list_item_t,
rdmacm_contents_constructor,
rdmacm_contents_destructor);
@ -142,7 +142,7 @@ typedef struct {
static void id_context_constructor(id_context_t *context);
static void id_context_destructor(id_context_t *context);
OBJ_CLASS_INSTANCE(id_context_t, opal_list_item_t,
OBJ_CLASS_INSTANCE(id_context_t, opal_list_item_t,
id_context_constructor,
id_context_destructor);
@ -225,7 +225,7 @@ static void rdmacm_contents_destructor(rdmacm_contents_t *contents)
/*
* Invoked by main thread
*
* Sets up any rdma_cm specific commandline params
* Sets up any rdma_cm specific commandline params
*/
static void rdmacm_component_register(void)
{
@ -288,7 +288,7 @@ static void rdmacm_component_register(void)
static char *stringify(uint32_t addr)
{
char *line = (char *) malloc(64);
asprintf(&line, "%d.%d.%d.%d (0x%x)",
asprintf(&line, "%d.%d.%d.%d (0x%x)",
#if defined(WORDS_BIGENDIAN)
(addr >> 24),
(addr >> 16) & 0xff,
@ -306,7 +306,7 @@ static char *stringify(uint32_t addr)
/*
* Invoked by service thread
*
*
* This function traverses the list of endpoints associated with the
* device and determines which of them the remote side is attempting
* to connect to. This is determined based on the local endpoint's
@ -360,7 +360,7 @@ static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(rdmacm_contents_t *conten
}
/*
* Returns max inlne size for qp #N
* Returns max inlne size for qp #N
*/
static uint32_t max_inline_size(int qp, mca_btl_openib_device_t *device)
{
@ -425,7 +425,7 @@ static int rdmacm_setup_qp(rdmacm_contents_t *contents,
attr.cap.max_recv_wr = 0;
}
attr.cap.max_send_wr = max_send_wr;
attr.cap.max_inline_data = req_inline =
attr.cap.max_inline_data = req_inline =
max_inline_size(qpnum, contents->openib_btl->device);
attr.cap.max_send_sge = 1;
attr.cap.max_recv_sge = 1; /* we do not use SG list */
@ -473,7 +473,7 @@ out:
}
/*
/*
* Invoked by both main and service threads
*
* To avoid all kinds of nasty race conditions, we only allow
@ -490,7 +490,7 @@ static bool i_initiate(uint32_t local_ipaddr, uint16_t local_port,
char *a = stringify(local_ipaddr);
char *b = stringify(remote_ipaddr);
#endif
if (local_ipaddr > remote_ipaddr ||
(local_ipaddr == remote_ipaddr && local_port < remote_port)) {
OPAL_OUTPUT((-1, "i_initiate (I WIN): local ipaddr %s, remote ipaddr %s",
@ -564,10 +564,10 @@ static int rdmacm_client_connect_one(rdmacm_contents_t *contents,
* RDMA_CM_EVENT_ADDR_RESOLVED event will occur on the local event
* handler.
*/
OPAL_OUTPUT((-1, "MAIN Resolving id: from IP %s:%d to IP %s:%d",
a = stringify(contents->ipaddr),
OPAL_OUTPUT((-1, "MAIN Resolving id: from IP %s:%d to IP %s:%d",
a = stringify(contents->ipaddr),
contents->tcp_port,
b = stringify(message->ipaddr),
b = stringify(message->ipaddr),
message->tcp_port));
#if OPAL_ENABLE_DEBUG
free(a);
@ -629,7 +629,7 @@ out:
return OMPI_ERROR;
}
/*
/*
* Invoked by main thread
*
* Connect method called by the upper layers to connect the local
@ -651,7 +651,7 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
invoked from the event_handler (to intitiate connections in the
Right direction), where we don't have the CPC, so it'll be
NULL. */
local_message =
local_message =
(modex_message_t *) endpoint->endpoint_local_cpc->data.cbm_modex_message;
message = (modex_message_t *)
endpoint->endpoint_remote_cpc_data->cbm_modex_message;
@ -698,8 +698,8 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
/* Are we the initiator? Or do we expect this connect request to
be rejected? */
endpoint->endpoint_initiator =
i_initiate(contents->ipaddr, contents->tcp_port,
endpoint->endpoint_initiator =
i_initiate(contents->ipaddr, contents->tcp_port,
message->ipaddr, message->tcp_port);
OPAL_OUTPUT((-1, "MAIN Start connect; ep=%p (%p), I %s the initiator to %s",
(void*) endpoint,
@ -711,7 +711,7 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
if (contents->endpoint->endpoint_initiator) {
/* Initiator needs a CTS frag (non-initiator will have a CTS
frag allocated later) */
if (OMPI_SUCCESS !=
if (OMPI_SUCCESS !=
(rc = ompi_btl_openib_connect_base_alloc_cts(contents->endpoint))) {
BTL_ERROR(("Failed to alloc CTS frag"));
goto out;
@ -720,7 +720,7 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
rc = rdmacm_client_connect_one(contents, message, qp);
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("rdmacm_client_connect_one error (real QP %d)",
BTL_ERROR(("rdmacm_client_connect_one error (real QP %d)",
qp));
goto out;
}
@ -750,7 +750,7 @@ out:
static void *show_help_cant_find_endpoint(void *context)
{
char *msg;
cant_find_endpoint_context_t *c =
cant_find_endpoint_context_t *c =
(cant_find_endpoint_context_t*) context;
if (NULL != c) {
@ -774,7 +774,7 @@ static void *show_help_cant_find_endpoint(void *context)
return NULL;
}
/*
/*
* Invoked by service thread
*
* The server thread will handle the incoming connection requests and
@ -822,12 +822,12 @@ static int handle_connect_request(struct rdma_cm_event *event)
}
message = (modex_message_t *) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
endpoint->endpoint_initiator =
endpoint->endpoint_initiator =
i_initiate(contents->ipaddr, contents->tcp_port,
message->ipaddr, rem_port);
BTL_VERBOSE(("ep state = %d, local ipaddr = %x, remote ipaddr = %x, local port = %d, remote port = %d",
endpoint->endpoint_state, contents->ipaddr, message->ipaddr,
endpoint->endpoint_state, contents->ipaddr, message->ipaddr,
contents->tcp_port, rem_port));
OPAL_OUTPUT((-1, "SERVICE in handle_connect_request; ep=%p (%p), I still %s the initiator to %s",
@ -879,7 +879,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
if (mca_btl_openib_component.credits_qp == qpnum) {
struct ibv_recv_wr *bad_wr, *wr;
if (OMPI_SUCCESS !=
if (OMPI_SUCCESS !=
ompi_btl_openib_connect_base_alloc_cts(endpoint)) {
BTL_ERROR(("Failed to alloc CTS frag"));
goto out1;
@ -888,7 +888,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
assert(NULL != wr);
wr->next = NULL;
if (0 != ibv_post_recv(endpoint->qps[qpnum].qp->lcl_qp,
if (0 != ibv_post_recv(endpoint->qps[qpnum].qp->lcl_qp,
wr, &bad_wr)) {
BTL_ERROR(("failed to post CTS recv buffer"));
goto out1;
@ -923,10 +923,10 @@ static int handle_connect_request(struct rdma_cm_event *event)
/* See rdma_connect(3) for a description of these 2 values. We
ensure to pass these values around via the modex so that we can
compute the values properly. */
conn_param.responder_resources =
conn_param.responder_resources =
mymin(contents->openib_btl->device->ib_dev_attr.max_qp_rd_atom,
message->device_max_qp_init_rd_atom);
conn_param.initiator_depth =
conn_param.initiator_depth =
mymin(contents->openib_btl->device->ib_dev_attr.max_qp_init_rd_atom,
message->device_max_qp_rd_atom);
conn_param.retry_count = mca_btl_openib_component.ib_retry_count;
@ -1001,9 +1001,9 @@ static void *call_disconnect_callback(void *v)
return NULL;
}
/*
/*
* Invoked by main thread
*
*
* Runs *while* the progress thread is running. We can't stop the
* progress thread because this function may be invoked to kill a
* specific endpoint that was the result of MPI-2 dynamics (i.e., this
@ -1033,12 +1033,12 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
opal_mutex_lock(&client_list_lock);
num_to_wait_for = disconnect_callbacks = 0;
for (item = opal_list_get_first(&client_list);
item != opal_list_get_end(&client_list);
item != opal_list_get_end(&client_list);
item = opal_list_get_next(item)) {
rdmacm_contents_t *contents = (rdmacm_contents_t *) item;
if (endpoint == contents->endpoint) {
while (NULL !=
while (NULL !=
(item2 = opal_list_remove_first(&(contents->ids)))) {
/* Fun race condition: we cannot call
rdma_disconnect() here in the main thread, because
@ -1131,7 +1131,7 @@ static int rdmacm_connect_endpoint(id_context_t *context,
BTL_ERROR(("Can't find endpoint"));
return OMPI_ERR_NOT_FOUND;
}
data =
data =
(rdmacm_endpoint_local_cpc_data_t *)endpoint->endpoint_local_cpc_data;
/* Only notify the upper layers after the last QP has been
@ -1210,7 +1210,7 @@ static int rdmacm_rejected(id_context_t *context, struct rdma_cm_event *event)
/* Why were we rejected? */
switch (*((reject_reason_t*) event->param.conn.private_data)) {
case REJECT_WRONG_DIRECTION:
OPAL_OUTPUT((-1, "SERVICE A good reject! for qp %d, id 0x%p",
OPAL_OUTPUT((-1, "SERVICE A good reject! for qp %d, id 0x%p",
context->qpnum, (void*) context->id));
rdmacm_destroy_dummy_qp(context);
break;
@ -1261,10 +1261,10 @@ out:
/*
* Runs in service thread
*/
static int create_dummy_cq(rdmacm_contents_t *contents,
static int create_dummy_cq(rdmacm_contents_t *contents,
mca_btl_openib_module_t *openib_btl)
{
contents->dummy_cq =
contents->dummy_cq =
ibv_create_cq(openib_btl->device->ib_dev_context, 1, NULL, NULL, 0);
if (NULL == contents->dummy_cq) {
BTL_ERROR(("dummy_cq not created"));
@ -1279,7 +1279,7 @@ out:
/*
* Runs in service thread
*/
static int create_dummy_qp(rdmacm_contents_t *contents,
static int create_dummy_qp(rdmacm_contents_t *contents,
struct rdma_cm_id *id, int qpnum)
{
struct ibv_qp_init_attr attr;
@ -1347,7 +1347,7 @@ static int finish_connect(id_context_t *context)
/* If we're the initiator, then setup the QP's and post the CTS
message buffer */
if (contents->endpoint->endpoint_initiator) {
rc = rdmacm_setup_qp(contents, contents->endpoint,
rc = rdmacm_setup_qp(contents, contents->endpoint,
context->id, context->qpnum);
if (0 != rc) {
BTL_ERROR(("rdmacm_setup_qp error %d", rc));
@ -1357,14 +1357,14 @@ static int finish_connect(id_context_t *context)
if (mca_btl_openib_component.credits_qp == context->qpnum) {
/* Post a single receive buffer on the smallest QP for the CTS
protocol */
struct ibv_recv_wr *bad_wr, *wr;
assert(NULL != contents->endpoint->endpoint_cts_frag.super.super.base.super.ptr);
wr = &(contents->endpoint->endpoint_cts_frag.rd_desc);
assert(NULL != wr);
wr->next = NULL;
if (0 != ibv_post_recv(contents->endpoint->qps[context->qpnum].qp->lcl_qp,
if (0 != ibv_post_recv(contents->endpoint->qps[context->qpnum].qp->lcl_qp,
wr, &bad_wr)) {
BTL_ERROR(("failed to post CTS recv buffer"));
goto out1;
@ -1399,10 +1399,10 @@ static int finish_connect(id_context_t *context)
memset(&conn_param, 0, sizeof(conn_param));
/* See above comment about rdma_connect(3) and these two values. */
conn_param.responder_resources =
conn_param.responder_resources =
mymin(contents->openib_btl->device->ib_dev_attr.max_qp_rd_atom,
message->device_max_qp_init_rd_atom);
conn_param.initiator_depth =
conn_param.initiator_depth =
mymin(contents->openib_btl->device->ib_dev_attr.max_qp_init_rd_atom,
message->device_max_qp_rd_atom);
conn_param.flow_control = 0;
@ -1456,7 +1456,7 @@ out:
return OMPI_ERROR;
}
/*
/*
* Runs in main thread
*/
static void *show_help_rdmacm_event_error(void *c)
@ -1566,14 +1566,14 @@ static int event_handler(struct rdma_cm_event *event)
found = false;
if (OMPI_SUCCESS == ompi_btl_openib_ini_query(attr->vendor_id,
attr->vendor_part_id,
&ini) &&
&ini) &&
ini.rdmacm_reject_causes_connect_error) {
found = true;
}
if (rdmacm_reject_causes_connect_error) {
found = true;
}
if (found) {
OPAL_OUTPUT((-1, "SERVICE Got CONNECT_ERROR, but ignored: %p", (void*) event->id));
rc = rdmacm_destroy_dummy_qp(context);
@ -1628,7 +1628,7 @@ static inline void rdmamcm_event_error(struct rdma_cm_event *event)
endpoint = ((id_context_t *)event->id->context)->contents->endpoint;
}
ompi_btl_openib_fd_run_in_main(mca_btl_openib_endpoint_invoke_error,
ompi_btl_openib_fd_run_in_main(mca_btl_openib_endpoint_invoke_error,
endpoint);
}
@ -1684,7 +1684,7 @@ static void *rdmacm_event_dispatch(int fd, int flags, void *context)
/*
* Runs in main thread
*
* CPC init function - Setup all globals here
* CPC init function - Setup all globals here
*/
static int rdmacm_init(mca_btl_openib_endpoint_t *endpoint)
{
@ -1716,14 +1716,14 @@ static int ipaddrcheck(id_context_t *context,
* up). Unfortunately, the subnet and IP address look up needs to match or
* there could be a mismatch if IP Aliases are being used. For more
* information on this, please read comment above
* mca_btl_openib_get_ip_subnet_id in btl_openib_ip.c
* mca_btl_openib_get_ip_subnet_id in btl_openib_ip.c
*/
ipaddr =
mca_btl_openib_rdma_get_ipv4addr(openib_btl->device->ib_dev_context,
ipaddr =
mca_btl_openib_rdma_get_ipv4addr(openib_btl->device->ib_dev_context,
openib_btl->port_num);
if (0 == ipaddr) {
BTL_VERBOSE(("*** Could not find IP address for %s:%d -- is there an IP address configured for this device?",
ibv_get_device_name(openib_btl->device->ib_dev),
ibv_get_device_name(openib_btl->device->ib_dev),
openib_btl->port_num));
return OMPI_ERR_NOT_FOUND;
}
@ -1735,16 +1735,16 @@ static int ipaddrcheck(id_context_t *context,
/* Ok, we found the IP address of this device/port. Have we
already see this IP address/TCP port before? */
for (item = opal_list_get_first(&server_listener_list);
item != opal_list_get_end(&server_listener_list);
for (item = opal_list_get_first(&server_listener_list);
item != opal_list_get_end(&server_listener_list);
item = opal_list_get_next(item)) {
rdmacm_contents_t *contents = (rdmacm_contents_t *)item;
BTL_VERBOSE(("paddr = %x, ipaddr addr = %x",
BTL_VERBOSE(("paddr = %x, ipaddr addr = %x",
contents->ipaddr, ipaddr));
if (contents->ipaddr == ipaddr &&
contents->tcp_port == server_tcp_port) {
str = stringify(ipaddr);
BTL_VERBOSE(("server already listening on %s:%d",
BTL_VERBOSE(("server already listening on %s:%d",
str, server_tcp_port));
free(str);
already_exists = true;
@ -1755,7 +1755,7 @@ static int ipaddrcheck(id_context_t *context,
/* If we haven't seen it before, save it */
if (!already_exists) {
str = stringify(ipaddr);
BTL_VERBOSE(("creating new server to listen on %s:%d",
BTL_VERBOSE(("creating new server to listen on %s:%d",
str, server_tcp_port));
free(str);
server->ipaddr = ipaddr;
@ -1765,8 +1765,8 @@ static int ipaddrcheck(id_context_t *context,
return already_exists ? OMPI_ERROR : OMPI_SUCCESS;
}
static int create_message(rdmacm_contents_t *server,
mca_btl_openib_module_t *openib_btl,
static int create_message(rdmacm_contents_t *server,
mca_btl_openib_module_t *openib_btl,
ompi_btl_openib_connect_base_module_data_t *data)
{
modex_message_t *message;
@ -1780,14 +1780,14 @@ static int create_message(rdmacm_contents_t *server,
return OMPI_ERR_OUT_OF_RESOURCE;
}
message->device_max_qp_rd_atom =
message->device_max_qp_rd_atom =
openib_btl->device->ib_dev_attr.max_qp_rd_atom;
message->device_max_qp_init_rd_atom =
message->device_max_qp_init_rd_atom =
openib_btl->device->ib_dev_attr.max_qp_init_rd_atom;
message->ipaddr = server->ipaddr;
message->tcp_port = server->tcp_port;
OPAL_OUTPUT((-1, "Message IP address is %s, port %d",
OPAL_OUTPUT((-1, "Message IP address is %s, port %d",
a = stringify(message->ipaddr), message->tcp_port));
#if OPAL_ENABLE_DEBUG
free(a);
@ -1798,7 +1798,7 @@ static int create_message(rdmacm_contents_t *server,
return OMPI_SUCCESS;
}
/*
/*
* Runs in main thread
*
* This function determines if the RDMACM is a possible cpc method and
@ -1943,7 +1943,7 @@ out:
} else {
opal_output_verbose(5, mca_btl_base_output,
"openib BTL: rmacm CPC unavailable for use on %s:%d; fatal error %d (%s)",
ibv_get_device_name(openib_btl->device->ib_dev),
ibv_get_device_name(openib_btl->device->ib_dev),
openib_btl->port_num, rc,
opal_strerror(rc));
}
@ -1971,7 +1971,7 @@ static int rdmacm_component_finalize(void)
if (NULL != event_channel) {
#ifndef __WINDOWS__
rc = ompi_btl_openib_fd_unmonitor(event_channel->fd,
rc = ompi_btl_openib_fd_unmonitor(event_channel->fd,
rdmacm_unmonitor, (void*) &barrier);
#endif
if (OMPI_SUCCESS != rc) {
@ -2048,7 +2048,7 @@ static int rdmacm_component_init(void)
ompi_btl_openib_fd_monitor(event_channel->fd, OPAL_EV_READ,
rdmacm_event_dispatch, NULL);
#endif
rdmacm_component_initialized = true;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -2,9 +2,9 @@
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/

Просмотреть файл

@ -36,7 +36,7 @@
#include "orte/util/show_help.h"
static void xoob_component_register(void);
static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
ompi_btl_openib_connect_base_module_t **cpc);
static int xoob_component_finalize(void);
@ -812,7 +812,7 @@ static void xoob_rml_recv_cb(int status, orte_process_name_t* process_name,
requested_lid, message_type);
if ( NULL == ib_endpoint) {
BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_REQUEST."
" Failed to find endpoint with subnet %" PRIx64
" Failed to find endpoint with subnet %" PRIx64
" and LID %d",
rem_info.rem_subnet_id,requested_lid));
mca_btl_openib_endpoint_invoke_error(NULL);
@ -899,7 +899,7 @@ static void xoob_rml_recv_cb(int status, orte_process_name_t* process_name,
/* update ib_addr with remote qp number */
ib_endpoint->ib_addr->remote_xrc_rcv_qp_num =
ib_endpoint->rem_info.rem_qps->rem_qp_num;
BTL_VERBOSE(("rem_info: lid %d, sid %" PRIx64
BTL_VERBOSE(("rem_info: lid %d, sid %" PRIx64
" ep %d %" PRIx64 "\n",
rem_info.rem_lid,
rem_info.rem_subnet_id,
@ -962,7 +962,7 @@ static void xoob_rml_recv_cb(int status, orte_process_name_t* process_name,
*/
/* Quere for the XOOB priority - will be highest in XRC case */
static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
ompi_btl_openib_connect_base_module_t **cpc)
{
int rc;
@ -986,7 +986,7 @@ static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
ensure to only post it *once*, because another btl may have
come in before this and already posted it. */
if (!rml_recv_posted) {
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
OMPI_RML_TAG_XOPENIB,
ORTE_RML_PERSISTENT,
xoob_rml_recv_cb,
@ -999,7 +999,7 @@ static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
}
rml_recv_posted = true;
}
(*cpc)->data.cbm_component = &ompi_btl_openib_connect_xoob;
(*cpc)->data.cbm_priority = xoob_priority;
(*cpc)->data.cbm_modex_message = NULL;

Просмотреть файл

@ -3,9 +3,9 @@
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/

Просмотреть файл

@ -2,9 +2,9 @@
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -30,26 +30,26 @@
*
* - a BTL module represents a network port (in the case of the openib
* BTL, a LID)
* - a CPC module represents one way to make connections to a BTL module
* - a CPC module represents one way to make connections to a BTL module
* - hence, a BTL module has potentially multiple CPC modules
* associated with it
* - an endpoint represnts a connection between a local BTL module and
* a remote BTL module (in the openib BTL, because of BSRQ, an
* endpoint can contain multiple QPs)
* - an endpoint represnts a connection between a local BTL module and
* a remote BTL module (in the openib BTL, because of BSRQ, an
* endpoint can contain multiple QPs)
* - when an endpoint is created, one of the CPC modules associated
* with the local BTL is selected and associated with the endpoint
* with the local BTL is selected and associated with the endpoint
* (obviously, it is a CPC module that is common between the local
* and remote BTL modules)
* - endpoints may be created and destroyed during the MPI job
* - endpoints are created lazily, during the first communication
* between two peers
* - endpoints are destroyed when two MPI processes become
* disconnected (e.g., MPI-2 dynamics or MPI_FINALIZE)
* - hence, BTL modules and CPC modules outlive endpoints.
* Specifically, BTL modules and CPC modules live from MPI_INIT to
* MPI_FINALIZE. endpoints come and go as MPI semantics demand it.
* - therefore, CPC modules need to cache information on endpoints that
* are specific to that connection.
* and remote BTL modules)
* - endpoints may be created and destroyed during the MPI job
* - endpoints are created lazily, during the first communication
* between two peers
* - endpoints are destroyed when two MPI processes become
* disconnected (e.g., MPI-2 dynamics or MPI_FINALIZE)
* - hence, BTL modules and CPC modules outlive endpoints.
* Specifically, BTL modules and CPC modules live from MPI_INIT to
* MPI_FINALIZE. endpoints come and go as MPI semantics demand it.
* - therefore, CPC modules need to cache information on endpoints that
* are specific to that connection.
*
* Component interface:
*
@ -57,7 +57,7 @@
* calls the connect_base_register() function, which scans all
* compiled-in CPC's. If they have component_register() functions,
* they are called (component_register() functions are only allowed to
* register MCA parameters).
* register MCA parameters).
*
* NOTE: The connect_base_register() function will process the
* btl_openib_cpc_include and btl_openib_cpc_exclude MCA parameters
@ -230,7 +230,7 @@ typedef int (*ompi_btl_openib_connect_base_component_init_fn_t)(void);
* - Other OMPI_ERR_* code: an error occurred.
*/
typedef int (*ompi_btl_openib_connect_base_func_component_query_t)
(struct mca_btl_openib_module_t *btl,
(struct mca_btl_openib_module_t *btl,
struct ompi_btl_openib_connect_base_module_t **cpc);
/**

Просмотреть файл

@ -6,7 +6,7 @@
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2006 The Regents of the University of California.
# All rights reserved.
@ -14,9 +14,9 @@
# Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved.
# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
# $COPYRIGHT$
#
#
# Additional copyrights may follow
#
#
# $HEADER$
#
# This is the US/English help file for Open MPI's OpenFabrics support
@ -151,9 +151,9 @@ sent.
This error usually means one of two things:
1. There is something awry within the network fabric itself.
1. There is something awry within the network fabric itself.
2. A bug in Open MPI has caused flow control to malfunction.
#1 is usually more likely. You should note the hosts on which this
error has occurred; it has been observed that rebooting or removing a
particular host from the job can sometimes resolve this issue.
@ -200,7 +200,7 @@ exceeded. "Retry count" is defined in the InfiniBand spec 1.2
This error typically means that there is something awry within the
InfiniBand fabric itself. You should note the hosts on which this
error has occurred; it has been observed that rebooting or removing a
particular host from the job can sometimes resolve this issue.
particular host from the job can sometimes resolve this issue.
Two MCA parameters can be used to control Open MPI's behavior with
respect to the retry count:
@ -280,7 +280,7 @@ Deactivating the OpenFabrics BTL.
Wrong buffer alignment %d configured on host '%s'. Should be bigger
than zero and power of two. Use default %d instead.
#
[of error event]
[of error event]
The OpenFabrics stack has reported a network error event. Open MPI
will try to continue, but your job may end up failing.
@ -591,7 +591,7 @@ conflict:
WARNING: The openib BTL was directed to use "eager RDMA" for short
messages, but the openib BTL was compiled with progress threads
support. Short eager RDMA is not yet supported with progress threads;
its use has been disabled in this job.
its use has been disabled in this job.
This is a warning only; you job will attempt to continue.
#
@ -644,7 +644,7 @@ be able to run successfully.
Local host: %s
Local adapter: %s (vendor 0x%x, part ID %d)
Local queues: %s
Remote host: %s
Remote adapter: (vendor 0x%x, part ID %d)
Remote queues: %s
@ -656,7 +656,7 @@ Such mixed network trasport configuration is not supported by Open MPI.
Local host: %s
Local adapter: %s (vendor 0x%x, part ID %d)
Local transport type: %s
Remote host: %s
Remote Adapter: (vendor 0x%x, part ID %d)
Remote transport type: %s

Просмотреть файл

@ -2,7 +2,7 @@
# Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2006-2008 Mellanox Technologies. All rights reserved.
# $COPYRIGHT$
#
#
# Additional copyrights may follow
#