1
1

Switch orte_output* -> OPAL_OUTPUT* for two reasons:

1. We can't use orte_output in the CPC service thread because orte is
    not thread safe
 1. Use the macro version sso that they're compiled out of production
    builds 

This commit was SVN r18455.
Этот коммит содержится в:
Jeff Squyres 2008-05-19 17:42:51 +00:00
родитель 7154776465
Коммит 01a7f7eeb6

Просмотреть файл

@ -575,8 +575,8 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
iWarp), so we can safely assume that we can use this CPC. */
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
if (IBV_TRANSPORT_IB != btl->hca->ib_dev->transport_type) {
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC only supported on InfiniBand");
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC only supported on InfiniBand"));
rc = OMPI_ERR_NOT_SUPPORTED;
goto error;
}
@ -584,8 +584,8 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
/* IBCM is not supported if we have any XRC QPs */
if (mca_btl_openib_component.num_xrc_qps > 0) {
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC not supported with XRC receive queues");
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC not supported with XRC receive queues"));
rc = OMPI_ERR_NOT_SUPPORTED;
goto error;
}
@ -608,8 +608,8 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
goto error;
}
msg = (modex_msg_t*) (m + 1);
orte_output(-1, "ibcm: created cpc module %p for btl %p",
(void*)m, (void*)btl);
OPAL_OUTPUT((-1, "ibcm: created cpc module %p for btl %p",
(void*)m, (void*)btl));
/* See if we've already for an IB CM listener for this device */
for (item = opal_list_get_first(&ibcm_cm_listeners);
@ -629,8 +629,8 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
cmh = OBJ_NEW(ibcm_listen_cm_id_t);
if (NULL == cmh) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC system error (malloc failed)");
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC system error (malloc failed)"));
goto error;
}
@ -646,8 +646,8 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
/* We can't open the device for some reason (can't read,
can't write, doesn't exist, ...etc.); IBCM is not setup
on this node. */
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC failed to open IB CM device: %s", filename);
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC failed to open IB CM device: %s", filename));
free(filename);
rc = OMPI_ERR_NOT_SUPPORTED;
goto error;
@ -663,8 +663,8 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
platform. So print an optional message and return
ERR_NOT_SUPPORTED (i.e., gracefully fail). */
OBJ_RELEASE(cmh);
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC failed to open IB CM device");
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC failed to open IB CM device"));
rc = OMPI_ERR_NOT_SUPPORTED;
goto error;
}
@ -674,8 +674,8 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
0 != ib_cm_listen(cmh->listen_cm_id, ibcm_pid, 0)) {
/* Same rationale as above */
OBJ_RELEASE(cmh);
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC failed to initialize IB CM handles");
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC failed to initialize IB CM handles"));
rc = OMPI_ERR_NOT_SUPPORTED;
goto error;
}
@ -707,19 +707,19 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
different formula). Query for the Nth GID (N = MCA param) on
the port. */
if (ibcm_gid_table_index > btl->ib_port_attr.gid_tbl_len) {
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC desired GID table index (%d) is larger than the actual table size (%d) on device %s",
ibcm_gid_table_index,
btl->ib_port_attr.gid_tbl_len,
ibv_get_device_name(btl->hca->ib_dev));
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC desired GID table index (%d) is larger than the actual table size (%d) on device %s",
ibcm_gid_table_index,
btl->ib_port_attr.gid_tbl_len,
ibv_get_device_name(btl->hca->ib_dev)));
rc = OMPI_ERR_UNREACH;
goto error;
}
rc = ibv_query_gid(btl->hca->ib_dev_context, btl->port_num, ibcm_gid_table_index,
&gid);
if (0 != rc) {
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC system error (ibv_query_gid failed)");
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC system error (ibv_query_gid failed)"));
rc = OMPI_ERR_UNREACH;
goto error;
}
@ -739,22 +739,22 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
/* All done */
*cpc = (ompi_btl_openib_connect_base_module_t *) m;
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC available for use on %s",
ibv_get_device_name(btl->hca->ib_dev));
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC available for use on %s",
ibv_get_device_name(btl->hca->ib_dev)));
return OMPI_SUCCESS;
error:
ibcm_module_finalize(btl, (ompi_btl_openib_connect_base_module_t *) m);
if (OMPI_ERR_NOT_SUPPORTED == rc) {
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: ibcm CPC unavailable for use on %s; skipped",
ibv_get_device_name(btl->hca->ib_dev));
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC unavailable for use on %s; skipped",
ibv_get_device_name(btl->hca->ib_dev)));
} else {
orte_output_verbose(5, mca_btl_base_output,
OPAL_OUTPUT_VERBOSE((5, mca_btl_base_output,
"openib BTL: ibcm CPC unavailable for use on %s; fatal error %d (%s)",
ibv_get_device_name(btl->hca->ib_dev), rc,
opal_strerror(rc));
ibv_get_device_name(btl->hca->ib_dev), rc,
opal_strerror(rc)));
}
return rc;
}
@ -880,7 +880,7 @@ static int fill_path_record(ibcm_module_t *m,
modex_msg_t *local_msg =
(modex_msg_t*) m->cpc.data.cbm_modex_message;
orte_output(-1, "filling path record");
OPAL_OUTPUT((-1, "filling path record"));
/* Global attributes */
path_rec->dgid.global.subnet_prefix =
path_rec->sgid.global.subnet_prefix =
@ -957,31 +957,31 @@ static int fill_path_record(ibcm_module_t *m,
path_rec->rate = IBV_RATE_MAX; break;
}
orte_output(-1, "Got src/dest subnet id: 0x%lx / 0x%lx",
path_rec->sgid.global.subnet_prefix,
path_rec->dgid.global.subnet_prefix);
orte_output(-1, "Got src/dest interface id: 0x%lx / 0x%lx",
path_rec->sgid.global.interface_id,
path_rec->dgid.global.interface_id);
orte_output(-1, "Got src/dest lid: 0x%x / 0x%x",
path_rec->slid, path_rec->dlid);
orte_output(-1, "Got raw_traffic: %d\n", path_rec->raw_traffic);
OPAL_OUTPUT((-1, "Got src/dest subnet id: 0x%lx / 0x%lx",
path_rec->sgid.global.subnet_prefix,
path_rec->dgid.global.subnet_prefix));
OPAL_OUTPUT((-1, "Got src/dest interface id: 0x%lx / 0x%lx",
path_rec->sgid.global.interface_id,
path_rec->dgid.global.interface_id));
OPAL_OUTPUT((-1, "Got src/dest lid: 0x%x / 0x%x",
path_rec->slid, path_rec->dlid));
OPAL_OUTPUT((-1, "Got raw_traffic: %d\n", path_rec->raw_traffic));
orte_output(-1, "Got flow_label: %d\n", path_rec->flow_label);
orte_output(-1, "Got hop_limit: %d\n", path_rec->hop_limit);
orte_output(-1, "Got traffic_class: %d\n", path_rec->traffic_class);
orte_output(-1, "Got reversible: 0x%x\n", path_rec->reversible);
orte_output(-1, "Got numb_path: %d\n", path_rec->numb_path);
orte_output(-1, "Got pkey: 0x%x\n", path_rec->pkey);
OPAL_OUTPUT((-1, "Got flow_label: %d\n", path_rec->flow_label));
OPAL_OUTPUT((-1, "Got hop_limit: %d\n", path_rec->hop_limit));
OPAL_OUTPUT((-1, "Got traffic_class: %d\n", path_rec->traffic_class));
OPAL_OUTPUT((-1, "Got reversible: 0x%x\n", path_rec->reversible));
OPAL_OUTPUT((-1, "Got numb_path: %d\n", path_rec->numb_path));
OPAL_OUTPUT((-1, "Got pkey: 0x%x\n", path_rec->pkey));
orte_output(-1, "Got sl: %d\n", path_rec->sl);
orte_output(-1, "Got mtu_selector: %d\n", path_rec->mtu_selector);
orte_output(-1, "Got mtu: %d\n", path_rec->mtu);
orte_output(-1, "Got rate_selector: %d\n", path_rec->rate_selector);
orte_output(-1, "Got rate: %d\n", path_rec->rate);
orte_output(-1, "Got packet_life_time_selector: %d\n", path_rec->packet_life_time_selector);
orte_output(-1, "Got packet lifetime: 0x%x\n", path_rec->packet_life_time);
orte_output(-1, "Got preference: %d\n", path_rec->preference);
OPAL_OUTPUT((-1, "Got sl: %d\n", path_rec->sl));
OPAL_OUTPUT((-1, "Got mtu_selector: %d\n", path_rec->mtu_selector));
OPAL_OUTPUT((-1, "Got mtu: %d\n", path_rec->mtu));
OPAL_OUTPUT((-1, "Got rate_selector: %d\n", path_rec->rate_selector));
OPAL_OUTPUT((-1, "Got rate: %d\n", path_rec->rate));
OPAL_OUTPUT((-1, "Got packet_life_time_selector: %d\n", path_rec->packet_life_time_selector));
OPAL_OUTPUT((-1, "Got packet lifetime: 0x%x\n", path_rec->packet_life_time));
OPAL_OUTPUT((-1, "Got preference: %d\n", path_rec->preference));
return OMPI_SUCCESS;
}
@ -994,8 +994,8 @@ static int ibcm_endpoint_init(struct mca_btl_base_endpoint_t *endpoint)
return OMPI_ERR_OUT_OF_RESOURCE;
}
orte_output(-1, "ibcm endpoint init for endpoint %p / %p",
(void*)endpoint, (void*)ie);
OPAL_OUTPUT((-1, "ibcm endpoint init for endpoint %p / %p",
(void*)endpoint, (void*)ie));
ie->ie_cpc = endpoint->endpoint_local_cpc;
ie->ie_endpoint = endpoint;
ie->ie_qps_created =
@ -1021,10 +1021,10 @@ static bool i_initiate(ibcm_module_t *m,
uint64_t my_port_guid = ntoh64(m->btl->hca->ib_dev_attr.node_guid) +
m->btl->port_num;
orte_output(-1, "i_initiate: my guid (%0lx), msg guid (%0lx)",
my_port_guid, msg->mm_port_guid);
orte_output(-1, "i_initiate: my pid (%d), msg pid (%d)",
ibcm_pid, msg->mm_service_id);
OPAL_OUTPUT((-1, "i_initiate: my guid (%0lx), msg guid (%0lx)",
my_port_guid, msg->mm_port_guid));
OPAL_OUTPUT((-1, "i_initiate: my pid (%d), msg pid (%d)",
ibcm_pid, msg->mm_service_id));
return
(my_port_guid == msg->mm_port_guid &&
@ -1042,7 +1042,7 @@ static ibcm_request_t *alloc_request(ibcm_module_t *m, modex_msg_t *msg,
{
struct ib_cm_req_param *cm_req;
ibcm_request_t *req = OBJ_NEW(ibcm_request_t);
orte_output(-1, "allocated cached req id: %p", (void*)req);
OPAL_OUTPUT((-1, "allocated cached req id: %p", (void*)req));
if (NULL == req) {
return NULL;
@ -1052,7 +1052,7 @@ static ibcm_request_t *alloc_request(ibcm_module_t *m, modex_msg_t *msg,
if (0 != ib_cm_create_id(m->cmh->cm_device,
&(req->super.cm_id),
NULL)) {
orte_output(-1, "ib cm: failed to create active device id");
OPAL_OUTPUT((-1, "ib cm: failed to create active device id"));
OBJ_RELEASE(req);
return NULL;
}
@ -1094,7 +1094,7 @@ static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
struct ibv_sa_path_rec path_rec;
bool do_initiate;
orte_output(-1,"ibcm start connect on endpoint %p", (void*)endpoint);
OPAL_OUTPUT((-1,"ibcm start connect on endpoint %p", (void*)endpoint));
/* Has an incoming request already initiated the connect sequence
on this endpoint? If so, just exit successfully -- the
@ -1115,7 +1115,7 @@ static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
/* Fill in the path record for this peer */
if (OMPI_SUCCESS != fill_path_record(m, endpoint, &path_rec)) {
orte_output(-1, "================ start connect failed!!!");
OPAL_OUTPUT((-1, "================ start connect failed!!!"));
rc = OMPI_ERR_NOT_FOUND;
goto err;
}
@ -1159,8 +1159,8 @@ static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
ie->ie_cm_id_cache = calloc(ie->ie_cm_id_cache_size,
sizeof(ibcm_base_cm_id_t*));
if (NULL == ie->ie_cm_id_cache) {
orte_output(-1, "ib cm: failed to malloc %d active device ids",
num_ids);
OPAL_OUTPUT((-1, "ib cm: failed to malloc %d active device ids",
num_ids));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto err;
}
@ -1192,16 +1192,16 @@ static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
cm_req->srq = BTL_OPENIB_QP_TYPE_SRQ(i);
cm_req->qp_num = endpoint->qps[i].qp->lcl_qp->qp_num;
cm_req->starting_psn = endpoint->qps[i].qp->lcl_psn;
orte_output(-1, "ibcm: sending my qpn %d, psn %d\n",
cm_req->qp_num, cm_req->starting_psn);
OPAL_OUTPUT((-1, "ibcm: sending my qpn %d, psn %d\n",
cm_req->qp_num, cm_req->starting_psn));
req->private_data.ireqd_request = req;
req->private_data.ireqd_qp_index = i;
/* Send the request */
orte_output(-1, "ibcm sending connect request %d of %d (id %p)",
OPAL_OUTPUT((-1, "ibcm sending connect request %d of %d (id %p)",
i, mca_btl_openib_component.num_qps,
(void*)req->super.cm_id);
(void*)req->super.cm_id));
if (0 != ib_cm_send_req(req->super.cm_id, cm_req)) {
rc = OMPI_ERR_UNREACH;
goto err;
@ -1231,9 +1231,9 @@ static int ibcm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
cm_req->srq = 0;
cm_req->qp_num = ie->ie_bogus_qp->qp_num;
cm_req->starting_psn = 0;
orte_output(-1, "ibcm: sending BOGUS qpn %d, psn %d (id %p)",
OPAL_OUTPUT((-1, "ibcm: sending BOGUS qpn %d, psn %d (id %p)",
cm_req->qp_num, cm_req->starting_psn,
(void*)req->super.cm_id);
(void*)req->super.cm_id));
req->private_data.ireqd_request = req;
req->private_data.ireqd_qp_index = 0;
@ -1354,17 +1354,17 @@ static int ibcm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
{
ibcm_endpoint_t *ie =
(ibcm_endpoint_t *) endpoint->endpoint_local_cpc_data;
orte_output(-1, "ibcm endpoint finalize: %p", (void*)endpoint);
OPAL_OUTPUT((-1, "ibcm endpoint finalize: %p", (void*)endpoint));
/* Free the stuff we allocated in ibcm_module_init */
if (NULL != ie) {
int i;
for (i = 0; i < ie->ie_cm_id_cache_size; ++i) {
if (NULL != ie->ie_cm_id_cache[i]) {
orte_output(-1, "Endpoint %p (%p), destroying ID %d (%p)\n",
OPAL_OUTPUT((-1, "Endpoint %p (%p), destroying ID %d (%p)\n",
(void*)endpoint,
(void*)ie,
i, (void*)&(ie->ie_cm_id_cache[i]->cm_id));
i, (void*)&(ie->ie_cm_id_cache[i]->cm_id)));
ib_cm_destroy_id(ie->ie_cm_id_cache[i]->cm_id);
OBJ_RELEASE(ie->ie_cm_id_cache[i]);
}
@ -1377,7 +1377,7 @@ static int ibcm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
endpoint->endpoint_local_cpc_data = NULL;
}
orte_output(-1, "ibcm endpoint finalize done: %p", (void*)endpoint);
OPAL_OUTPUT((-1, "ibcm endpoint finalize done: %p", (void*)endpoint));
return OMPI_SUCCESS;
}
@ -1428,13 +1428,13 @@ static int qp_to_rtr(int qp_index, struct ib_cm_id *cm_id,
(mtu == IBV_MTU_4096) ? "4096" :
"unknown (!)"));
}
orte_output(-1, "ibm cm handler: connect qp set to IBV value %d (%s bytes)", mtu,
OPAL_OUTPUT((-1, "ibm cm handler: connect qp set to IBV value %d (%s bytes)", mtu,
(mtu == IBV_MTU_256) ? "256" :
(mtu == IBV_MTU_512) ? "512" :
(mtu == IBV_MTU_1024) ? "1024" :
(mtu == IBV_MTU_2048) ? "2048" :
(mtu == IBV_MTU_4096) ? "4096" :
"unknown (!)");
"unknown (!)"));
/* Move the QP into the INIT state */
memset(&attr, 0, sizeof(attr));
@ -1463,7 +1463,7 @@ static int qp_to_rtr(int qp_index, struct ib_cm_id *cm_id,
/* Setup attributes */
attr.path_mtu = mtu;
attr.rq_psn = endpoint->qps[qp_index].qp->lcl_psn;
orte_output(-1, "ib cm qp connect: setting rq psn: %d", attr.rq_psn);
OPAL_OUTPUT((-1, "ib cm qp connect: setting rq psn: %d", attr.rq_psn));
/* IBM CM does not set these values for us */
attr.max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
attr.min_rnr_timer = mca_btl_openib_component.ib_min_rnr_timer;
@ -1507,7 +1507,7 @@ static int qp_to_rts(int qp_index, struct ib_cm_id *cm_id,
}
/* All done */
orte_output(-1, "successfully set RTS");
OPAL_OUTPUT((-1, "successfully set RTS"));
return OMPI_SUCCESS;
}
@ -1520,7 +1520,7 @@ static void *callback_set_endpoint_connecting(void *context)
mca_btl_openib_endpoint_t *endpoint =
(mca_btl_openib_endpoint_t *) context;
orte_output(-1, "ibcm scheduled callback: setting endpoint to CONNECTING");
OPAL_OUTPUT((-1, "ibcm scheduled callback: setting endpoint to CONNECTING"));
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
return NULL;
@ -1535,15 +1535,15 @@ static void *callback_start_connect(void *context)
callback_start_connect_data_t *cbdata =
(callback_start_connect_data_t *) context;
orte_output(-1, "ibcm scheduled callback: calling start_connect()");
orte_output(-1, "ibcm scheduled callback: cbdata %p",
(void*)cbdata);
orte_output(-1, "ibcm scheduled callback: endpoint %p",
(void*)cbdata->cscd_endpoint);
orte_output(-1, "ibcm scheduled callback: ie %p",
(void*)cbdata->cscd_endpoint->endpoint_local_cpc_data);
orte_output(-1, "ibcm scheduled callback: msg %p",
(void*)cbdata->cscd_endpoint->endpoint_remote_cpc_data->cbm_modex_message);
OPAL_OUTPUT((-1, "ibcm scheduled callback: calling start_connect()"));
OPAL_OUTPUT((-1, "ibcm scheduled callback: cbdata %p",
(void*)cbdata));
OPAL_OUTPUT((-1, "ibcm scheduled callback: endpoint %p",
(void*)cbdata->cscd_endpoint));
OPAL_OUTPUT((-1, "ibcm scheduled callback: ie %p",
(void*)cbdata->cscd_endpoint->endpoint_local_cpc_data));
OPAL_OUTPUT((-1, "ibcm scheduled callback: msg %p",
(void*)cbdata->cscd_endpoint->endpoint_remote_cpc_data->cbm_modex_message));
ibcm_module_start_connect(cbdata->cscd_cpc, cbdata->cscd_endpoint);
free(cbdata);
@ -1572,19 +1572,19 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
ibcm_module_t *m;
ibcm_reply_t *rep;
orte_output(-1, "ibcm req handler: remote qp index %d, remote guid %lx, remote qkey %u, remote qpn %d, remote psn %d",
OPAL_OUTPUT((-1, "ibcm req handler: remote qp index %d, remote guid %lx, remote qkey %u, remote qpn %d, remote psn %d",
qp_index,
ntoh64(req->primary_path->dgid.global.interface_id),
req->remote_qkey, req->remote_qpn,
req->starting_psn);
req->starting_psn));
/* Find the ibcm module for this request: remember that IB CM
events come in per *device*, not per *port*. So we just got a
device event, and have to find the ibcm_module_t (i.e., local
port/openib BTL module ) that corresponds to it. */
orte_output(-1, "looking for ibcm module -- source port guid: 0x%lx (%p)",
OPAL_OUTPUT((-1, "looking for ibcm module -- source port guid: 0x%lx (%p)",
ntoh64(req->primary_path->sgid.global.interface_id),
(void*)cmh);
(void*)cmh));
for (item = opal_list_get_first(&(cmh->ibcm_modules));
item != opal_list_get_end(&(cmh->ibcm_modules));
item = opal_list_get_next(item)) {
@ -1592,8 +1592,8 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
imli = (ibcm_module_list_item_t*) item;
m = imli->ibcm_module;
msg = imli->ibcm_module->cpc.data.cbm_modex_message;
orte_output(-1, "comparing ibcm module port guid: 0x%lx",
msg->mm_port_guid);
OPAL_OUTPUT((-1, "comparing ibcm module port guid: 0x%lx",
msg->mm_port_guid));
if (msg->mm_port_guid ==
ntoh64(req->primary_path->sgid.global.interface_id)) {
break;
@ -1612,37 +1612,37 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
ib_proc != (mca_btl_openib_proc_t*)
opal_list_get_end(&mca_btl_openib_component.ib_procs);
ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
orte_output(-1, "ibcm req: checking ib_proc %p", (void*)ib_proc);
OPAL_OUTPUT((-1, "ibcm req: checking ib_proc %p", (void*)ib_proc));
/* Now cycle through all the endpoints on that proc */
for (i = 0; !found && i < (int) ib_proc->proc_endpoint_count; ++i) {
orte_output(-1, "ibcm req: checking endpoint %d of %d (ep %p, cpc data %p)",
OPAL_OUTPUT((-1, "ibcm req: checking endpoint %d of %d (ep %p, cpc data %p)",
i, (int) ib_proc->proc_endpoint_count,
(void*)ib_proc->proc_endpoints[i],
(void*)ib_proc->proc_endpoints[i]->endpoint_remote_cpc_data);
(void*)ib_proc->proc_endpoints[i]->endpoint_remote_cpc_data));
if (NULL == ib_proc->proc_endpoints[i]->endpoint_remote_cpc_data) {
orte_output(-1, "NULL remote cpc data!");
OPAL_OUTPUT((-1, "NULL remote cpc data!"));
}
msg = ib_proc->proc_endpoints[i]->endpoint_remote_cpc_data->cbm_modex_message;
orte_output(-1, "ibcm req: my guid 0x%lx, remote guid 0x%lx",
OPAL_OUTPUT((-1, "ibcm req: my guid 0x%lx, remote guid 0x%lx",
msg->mm_port_guid,
ntoh64(req->primary_path->dgid.global.interface_id));
ntoh64(req->primary_path->dgid.global.interface_id)));
if (msg->mm_port_guid ==
ntoh64(req->primary_path->dgid.global.interface_id) &&
msg->mm_service_id == active_private_data->ireqd_pid &&
msg->mm_port_num == req->port) {
orte_output(-1, "*** found matching endpoint!!!");
OPAL_OUTPUT((-1, "*** found matching endpoint!!!"));
endpoint = ib_proc->proc_endpoints[i];
found = true;
}
}
}
if (!found) {
orte_output(-1, "ibcm req: could not find match for calling endpoint!");
OPAL_OUTPUT((-1, "ibcm req: could not find match for calling endpoint!"));
rc = OMPI_ERR_NOT_FOUND;
rej_reason = REJ_PEER_NOT_FOUND;
goto reject;
}
orte_output(-1, "ibcm req: Found endpoint %p", (void*)endpoint);
OPAL_OUTPUT((-1, "ibcm req: Found endpoint %p", (void*)endpoint));
/* Get our CPC-local data on the endpoint */
ie = (ibcm_endpoint_t*) endpoint->endpoint_local_cpc_data;
@ -1659,25 +1659,25 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
debugging. */
opal_mutex_lock(&ie->ie_lock);
if (do_initiate) {
orte_output(-1, "ibcm request: request came from wrong direction");
OPAL_OUTPUT((-1, "ibcm request: request came from wrong direction"));
rc = OMPI_SUCCESS;
rej_reason = REJ_WRONG_DIRECTION;
} else if (ie->ie_connection_flags & CFLAGS_COMPLETED) {
orte_output(-1, "ibcm request: all QPs already connected");
OPAL_OUTPUT((-1, "ibcm request: all QPs already connected"));
rej_reason = REJ_ALREADY_CONNECTED;
rc = OMPI_SUCCESS;
} else if (ie->ie_connection_flags & CFLAGS_ONGOING) {
/* See if the request for this QP already arrived */
if (ie->ie_qps_created &&
IBV_QPS_RESET != endpoint->qps[qp_index].qp->lcl_qp->state) {
orte_output(-1, "ibcm request: this QP (%d) already connected",
qp_index);
OPAL_OUTPUT((-1, "ibcm request: this QP (%d) already connected",
qp_index));
rej_reason = REJ_QP_ALREADY_CONNECTED;
rc = OMPI_SUCCESS;
}
} else {
/* this is the first activity -- accept */
orte_output(-1, "ibcm request: first initiation request");
OPAL_OUTPUT((-1, "ibcm request: first initiation request"));
ie->ie_connection_flags |= CFLAGS_ONGOING;
}
opal_mutex_unlock(&ie->ie_lock);
@ -1687,11 +1687,11 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
IBCM will trigger a new event and we'll just reject it
again. */
if (REJ_MAX != rej_reason) {
orte_output(-1, "arbitrartion failed -- reject");
OPAL_OUTPUT((-1, "arbitrartion failed -- reject"));
goto reject;
}
orte_output(-1, "ibcm req handler: initiation arbitration successful -- proceeding");
OPAL_OUTPUT((-1, "ibcm req handler: initiation arbitration successful -- proceeding"));
/* If this is the first request we have received for this
endpoint, then make *all* the QP's (because we analyze all the
@ -1704,11 +1704,11 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
endpoint);
if (OMPI_SUCCESS != (rc = qp_create_all(endpoint, m))) {
rej_reason = REJ_PASSIVE_SIDE_ERROR;
orte_output(-1, "qp_create_all failed -- reject");
OPAL_OUTPUT((-1, "qp_create_all failed -- reject"));
goto reject;
}
ie->ie_qps_created = true;
orte_output(-1, "ibcm request: created qp's");
OPAL_OUTPUT((-1, "ibcm request: created qp's"));
}
/* Save these numbers on the endpoint for reference. Other values
@ -1722,7 +1722,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
/* Connect this QP to the peer */
if (OMPI_SUCCESS != (rc = qp_to_rtr(qp_index,
event->cm_id, endpoint))) {
orte_output(-1, "ib cm req handler: failed to connect qp");
OPAL_OUTPUT((-1, "ib cm req handler: failed to connect qp"));
rej_reason = REJ_PASSIVE_SIDE_ERROR;
goto reject;
}
@ -1736,7 +1736,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
if (OMPI_SUCCESS !=
(rc = mca_btl_openib_endpoint_post_recvs(endpoint))) {
/* JMS */
orte_output(-1, "ib cm req handler: failed to post recv buffers");
OPAL_OUTPUT((-1, "ib cm req handler: failed to post recv buffers"));
rej_reason = REJ_PASSIVE_SIDE_ERROR;
goto reject;
}
@ -1750,7 +1750,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
sizeof(ibcm_base_cm_id_t*));
if (NULL == ie->ie_cm_id_cache) {
rej_reason = REJ_PASSIVE_SIDE_ERROR;
orte_output(-1, "malloc failed -- reject");
OPAL_OUTPUT((-1, "malloc failed -- reject"));
goto reject;
}
}
@ -1758,7 +1758,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
/* Save the CM ID on the endpoint for destruction later */
ie->ie_cm_id_cache[qp_index] = OBJ_NEW(ibcm_base_cm_id_t);
if (NULL == ie->ie_cm_id_cache[qp_index]) {
orte_output(-1, "ib cm req handler: malloc failed");
OPAL_OUTPUT((-1, "ib cm req handler: malloc failed"));
rej_reason = REJ_PASSIVE_SIDE_ERROR;
goto reject;
}
@ -1769,7 +1769,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
if (NULL == req) {
rej_reason = REJ_PASSIVE_SIDE_ERROR;
rc = OMPI_ERR_OUT_OF_RESOURCE;
orte_output(-1, "OBJ_NEW failed -- reject");
OPAL_OUTPUT((-1, "OBJ_NEW failed -- reject"));
goto reject;
}
rep->super.cm_id = event->cm_id;
@ -1778,8 +1778,8 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
rep->cm_rep.qp_num = endpoint->qps[qp_index].qp->lcl_qp->qp_num;
rep->cm_rep.srq = BTL_OPENIB_QP_TYPE_SRQ(qp_index);
rep->cm_rep.starting_psn = endpoint->qps[qp_index].qp->lcl_psn;
orte_output(-1, "ib cm reply: setting reply psn %d\n",
rep->cm_rep.starting_psn);
OPAL_OUTPUT((-1, "ib cm reply: setting reply psn %d\n",
rep->cm_rep.starting_psn));
rep->cm_rep.responder_resources = req->responder_resources;
rep->cm_rep.initiator_depth = req->initiator_depth;
rep->cm_rep.target_ack_delay = 20;
@ -1793,22 +1793,22 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
if (0 != (rc = ib_cm_send_rep(event->cm_id, &(rep->cm_rep)))) {
/* JMS */
orte_output(-1, "ibcm req handler: failed to send reply for qp index %d",
qp_index);
OPAL_OUTPUT((-1, "ibcm req handler: failed to send reply for qp index %d",
qp_index));
OBJ_RELEASE(rep);
rej_reason = REJ_PASSIVE_SIDE_ERROR;
orte_output(-1, "failed to send request -- reject");
OPAL_OUTPUT((-1, "failed to send request -- reject"));
goto reject;
}
opal_list_append(&ibcm_pending_replies, &(rep->super.super));
orte_output(-1, "ibcm req handler: sent reply for qp index %d",
qp_index);
OPAL_OUTPUT((-1, "ibcm req handler: sent reply for qp index %d",
qp_index));
return OMPI_SUCCESS;
reject:
/* Reject the request */
orte_output(-1, "rejecting request");
OPAL_OUTPUT((-1, "rejecting request"));
ib_cm_send_rej(event->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
&rej_reason, sizeof(rej_reason),
event->private_data, sizeof(ibcm_req_data_t));
@ -1836,9 +1836,9 @@ static void *callback_set_endpoint_connected(void *context)
{
mca_btl_openib_endpoint_t *endpoint = (mca_btl_openib_endpoint_t*) context;
orte_output(-1, "ibcm: calling endpoint_connected");
OPAL_OUTPUT((-1, "ibcm: calling endpoint_connected"));
mca_btl_openib_endpoint_connected(endpoint);
orte_output(-1, "ibcm: *** CONNECTED endpoint_connected done!");
OPAL_OUTPUT((-1, "ibcm: *** CONNECTED endpoint_connected done!"));
return NULL;
}
@ -1877,8 +1877,8 @@ static int reply_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
ibcm_endpoint_t *ie;
ibcm_rtu_data_t rtu_data;
orte_output(-1, "ibcm handler: got reply! (qp index %d) endpoint: %p",
p->irepd_qp_index, (void*) endpoint);
OPAL_OUTPUT((-1, "ibcm handler: got reply! (qp index %d) endpoint: %p",
p->irepd_qp_index, (void*) endpoint));
ie = (ibcm_endpoint_t*) endpoint->endpoint_local_cpc_data;
endpoint->rem_info.rem_qps[p->irepd_qp_index].rem_psn =
@ -1888,13 +1888,13 @@ static int reply_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
/* Move the QP to RTR and RTS */
if (OMPI_SUCCESS != (rc = qp_to_rtr(p->irepd_qp_index,
event->cm_id, endpoint))) {
orte_output(-1, "ib cm req handler: failed move to RTR");
OPAL_OUTPUT((-1, "ib cm req handler: failed move to RTR"));
return rc;
}
if (OMPI_SUCCESS != (rc = qp_to_rts(p->irepd_qp_index,
event->cm_id, endpoint))) {
orte_output(-1, "ib cm req handler: failed move to RTS");
OPAL_OUTPUT((-1, "ib cm req handler: failed move to RTS"));
return rc;
}
@ -1907,7 +1907,7 @@ static int reply_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
if (!ie->ie_recv_buffers_posted) {
if (OMPI_SUCCESS !=
(rc = mca_btl_openib_endpoint_post_recvs(endpoint))) {
orte_output(-1, "ib cm: failed to post recv buffers");
OPAL_OUTPUT((-1, "ib cm: failed to post recv buffers"));
return rc;
}
ie->ie_recv_buffers_posted = true;
@ -1917,21 +1917,21 @@ static int reply_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
rtu_data.irtud_reply = reply;
rtu_data.irtud_qp_index = p->irepd_qp_index;
if (0 != ib_cm_send_rtu(event->cm_id, &rtu_data, sizeof(rtu_data))) {
orte_output(-1, "ib cm rep handler: failed to send RTU");
OPAL_OUTPUT((-1, "ib cm rep handler: failed to send RTU"));
return OMPI_ERR_IN_ERRNO;
}
/* Remove the pending request because we won't need to handle
errors for it */
orte_output(-1, "reply received cm id %p -- original cached req %p",
(void*)cmh->listen_cm_id, (void*)request);
OPAL_OUTPUT((-1, "reply received cm id %p -- original cached req %p",
(void*)cmh->listen_cm_id, (void*)request));
opal_list_remove_item(&ibcm_pending_requests, &(request->super.super));
OBJ_RELEASE(request);
/* Have all the QP's been connected? If so, tell the main BTL
that we're done. */
if (0 == --(ie->ie_qps_to_connect)) {
orte_output(-1, "ib cm rep handler: REPLY telling main BTL we're connected");
OPAL_OUTPUT((-1, "ib cm rep handler: REPLY telling main BTL we're connected"));
ompi_btl_openib_fd_schedule(callback_set_endpoint_connected, endpoint);
}
@ -1950,31 +1950,31 @@ static int ready_to_use_received(ibcm_listen_cm_id_t *h,
mca_btl_openib_endpoint_t *endpoint = reply->endpoint;
ibcm_endpoint_t *ie = (ibcm_endpoint_t*) endpoint->endpoint_local_cpc_data;
orte_output(-1, "ibcm handler: got RTU! (index %d)", p->irtud_qp_index);
OPAL_OUTPUT((-1, "ibcm handler: got RTU! (index %d)", p->irtud_qp_index));
/* Move the QP to RTS */
if (OMPI_SUCCESS != (rc = qp_to_rts(p->irtud_qp_index,
event->cm_id, endpoint))) {
orte_output(-1, "ib cm rtu handler: failed move to RTS (index %d)",
p->irtud_qp_index);
OPAL_OUTPUT((-1, "ib cm rtu handler: failed move to RTS (index %d)",
p->irtud_qp_index));
return rc;
}
/* Remove the pending reply because we won't need to handle errors
for it */
orte_output(-1, "RTU received cm id %p -- original cached reply %p",
(void*)event->cm_id, (void*)reply);
OPAL_OUTPUT((-1, "RTU received cm id %p -- original cached reply %p",
(void*)event->cm_id, (void*)reply));
opal_list_remove_item(&ibcm_pending_replies, &(reply->super.super));
OBJ_RELEASE(reply);
/* Have all the QP's been connected? If so, tell the main BTL
that we're done. */
if (0 == --(ie->ie_qps_to_connect)) {
orte_output(-1, "ib cm rtu handler: RTU telling main BTL we're connected");
OPAL_OUTPUT((-1, "ib cm rtu handler: RTU telling main BTL we're connected"));
ompi_btl_openib_fd_schedule(callback_set_endpoint_connected, endpoint);
}
orte_output(-1, "ib cm rtu handler: all done");
OPAL_OUTPUT((-1, "ib cm rtu handler: all done"));
return OMPI_SUCCESS;
}
@ -1982,7 +1982,7 @@ static int ready_to_use_received(ibcm_listen_cm_id_t *h,
static int disconnect_request_received(ibcm_listen_cm_id_t *cmh,
struct ib_cm_event *event)
{
orte_output(-1, "ibcm handler: disconnect request received");
OPAL_OUTPUT((-1, "ibcm handler: disconnect request received"));
return OMPI_SUCCESS;
}
@ -1990,7 +1990,7 @@ static int disconnect_request_received(ibcm_listen_cm_id_t *cmh,
static int disconnect_reply_received(ibcm_listen_cm_id_t *cmd,
struct ib_cm_event *event)
{
orte_output(-1, "ibcm handler: disconnect reply received");
OPAL_OUTPUT((-1, "ibcm handler: disconnect reply received"));
#if 0
ib_cm_send_drep(event->cm_id, NULL, 0);
#endif
@ -2004,8 +2004,8 @@ static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
ibcm_reject_reason_t *rej_reason =
(ibcm_reject_reason_t *) event->param.rej_rcvd.ari;
orte_output(-1, "ibcm handler: reject received: reason %d, official reason: %d",
reason, *rej_reason);
OPAL_OUTPUT((-1, "ibcm handler: reject received: reason %d, official reason: %d",
reason, *rej_reason));
/* Determine if we expected this reject or not */
@ -2018,33 +2018,33 @@ static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
ibcm_endpoint_t *ie = (ibcm_endpoint_t*)
endpoint->endpoint_local_cpc_data;
orte_output(-1, "ibcm rej handler: got WRONG_DIRECTION reject, endpoint: %p, pid %d, ep_index %d, qp_index %d",
OPAL_OUTPUT((-1, "ibcm rej handler: got WRONG_DIRECTION reject, endpoint: %p, pid %d, ep_index %d, qp_index %d",
(void*)my_private_data->ireqd_request->endpoint,
my_private_data->ireqd_pid,
my_private_data->ireqd_ep_index,
my_private_data->ireqd_qp_index);
my_private_data->ireqd_qp_index));
if (NULL == ie->ie_bogus_qp) {
orte_output(-1, "ibcm rej handler: WRONG_DIRECTION unexpected!");
OPAL_OUTPUT((-1, "ibcm rej handler: WRONG_DIRECTION unexpected!"));
} else {
/* Remove from the global pending_requests list because we
no longer need to handle errors for it */
orte_output(-1, "reply received cm id %p -- original cached req %p",
OPAL_OUTPUT((-1, "reply received cm id %p -- original cached req %p",
(void*)cmh->listen_cm_id,
(void*)request);
(void*)request));
opal_list_remove_item(&ibcm_pending_requests,
&(request->super.super));
/* We ack the event and then destroy the CM ID (you *must*
ACK it first -- the destroy will block until all
outstand events on this ID are complete) */
orte_output(-1, "ibcm rej handler: destroying bogus CM ID: %p",
(void*)request->super.cm_id);
OPAL_OUTPUT((-1, "ibcm rej handler: destroying bogus CM ID: %p",
(void*)request->super.cm_id));
ib_cm_ack_event(event);
ib_cm_destroy_id(request->super.cm_id);
/* Destroy the QP */
orte_output(-1, "ibcm rej handler: destroying bogus qp");
OPAL_OUTPUT((-1, "ibcm rej handler: destroying bogus qp"));
ibv_destroy_qp(ie->ie_bogus_qp);
ie->ie_bogus_qp = NULL;
@ -2055,15 +2055,15 @@ static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
return OMPI_SUCCESS;
}
orte_output(-1, "ibcm rej handler: got unexpected reject type: %d",
reason);
OPAL_OUTPUT((-1, "ibcm rej handler: got unexpected reject type: %d",
reason));
return OMPI_ERR_NOT_FOUND;
}
static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
{
ibcm_request_t *req;
orte_output(-1, "ibcm handler: request error!");
OPAL_OUTPUT((-1, "ibcm handler: request error!"));
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
@ -2073,8 +2073,8 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
return OMPI_ERROR;
}
orte_output(-1, "Got timeout in IBCM request (CM ID: %p)",
(void*)event->cm_id);
OPAL_OUTPUT((-1, "Got timeout in IBCM request (CM ID: %p)",
(void*)event->cm_id));
req = (ibcm_request_t*) find_cm_id(event->cm_id,
&ibcm_pending_requests);
if (NULL == req) {
@ -2092,7 +2092,7 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
{
ibcm_reply_t *rep;
orte_output(-1, "ibcm handler: reply error!");
OPAL_OUTPUT((-1, "ibcm handler: reply error!"));
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
@ -2102,8 +2102,8 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
return OMPI_ERROR;
}
orte_output(-1, "Got timeout in IBCM reply (id: %p) -- aborting because resend is not written yet...",
(void*)event->cm_id);
OPAL_OUTPUT((-1, "Got timeout in IBCM reply (id: %p) -- aborting because resend is not written yet...",
(void*)event->cm_id));
rep = (ibcm_reply_t*) find_cm_id(event->cm_id,
&ibcm_pending_replies);
if (NULL == rep) {
@ -2121,15 +2121,15 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
static int disconnect_request_error(ibcm_listen_cm_id_t *cmh,
struct ib_cm_event *e)
{
orte_output(-1, "ibcm handler: disconnect request error!");
OPAL_OUTPUT((-1, "ibcm handler: disconnect request error!"));
return OMPI_SUCCESS;
}
static int unhandled_event(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *e)
{
orte_output(-1, "ibcm handler: unhandled event error (%p, %d)",
(void*) e, e->event);
OPAL_OUTPUT((-1, "ibcm handler: unhandled event error (%p, %d)",
(void*) e, e->event));
return OMPI_ERR_NOT_FOUND;
}
@ -2203,7 +2203,7 @@ static void *ibcm_event_dispatch(int fd, int flags, void *context)
}
if (OMPI_SUCCESS != rc) {
orte_output(-1, "An error occurred handling an IBCM event. Bad things are likely to happen.");
OPAL_OUTPUT((-1, "An error occurred handling an IBCM event. Bad things are likely to happen."));
}
}