1
1

Remove all retransmission code; the IBCM kernel module handles all of

that for us.

This commit was SVN r18432.
Этот коммит содержится в:
Jeff Squyres 2008-05-13 16:10:34 +00:00
родитель 74bf1ae25f
Коммит d8e5608053

Просмотреть файл

@ -12,10 +12,9 @@
* TO-DO: * TO-DO:
* *
* - audit control values passed to req_send() * - audit control values passed to req_send()
* - Somehow handle retransmission of RTU; don't know how to do this
* yet. :-(
* - More show_help() throughout * - More show_help() throughout
* - ...? * - error handling in case of broken connection is not good; need to
* notify btl module safely
*/ */
/* /*
@ -219,23 +218,15 @@
* start_connect() will have created them already), create all * start_connect() will have created them already), create all
* num_qps QPs. Also post receive buffers on all QPs. * num_qps QPs. Also post receive buffers on all QPs.
* *
* The IBCM CPC handles the retransmission of request and reply * We wholly reply on the IBCM system for all retransmissions and
* messages, but not RTUs. If we get a REQ_ERROR or REP_ERROR, it * duplicate filtering of IBCM requests, replies, and RTUs. If IBCM
* means that the IBCM system tried to retransmit a few times and * reports a timeout error up to OMPI, we abort the connection. Lists
* failed. Honestly, we could probably just give up at this point, * are maintained of pending IBCM requests and replies solely for
* but one of the point of the IBCM CPC is to work at large scale, so * error handling; request/reply timeouts are reported via CM ID. We
* there might be heavy-duty congestion such that the UD messages are * can cross-reference this CM ID to the endpoint that it was trying
* actually getting lost. So we'll find the request / reply and * to connect via these lists.
* re-send it -- we'll do this indefinitely.
* *
* We wholly rely on the IBCM system to retransmit the RTU, however. * Note that there is a race condition: because UD is unordered, the
* If the passive side doesn't receive the RTU, it'll automatically
* retransmit the reply (or raise a REP_ERROR and have us retransmit
* it). If the active side had already sent the RTU, it'll recognize
* the incoming reply as a duplicate and therefore retransmit the RTU
* for us. Nifty.
*
* However, there is a race condition: because UD is unordered, the
* first message may arrive on the QP before the RTU has arrived. * first message may arrive on the QP before the RTU has arrived.
* This will cause an IBV_EVENT_COMM_EST event to be raised, which * This will cause an IBV_EVENT_COMM_EST event to be raised, which
* would then be picked up by the async event handler in the * would then be picked up by the async event handler in the
@ -387,9 +378,9 @@ typedef struct {
static OBJ_CLASS_INSTANCE(ibcm_base_cm_id_t, opal_list_item_t, NULL, NULL); static OBJ_CLASS_INSTANCE(ibcm_base_cm_id_t, opal_list_item_t, NULL, NULL);
/* /*
* Need to maintain a list of pending CM ID requests (in case we need * Need to maintain a list of pending CM ID requests (for error
* to retransmit). Need to use the struct name here because it was * handling if the requests timeout). Need to use the struct name
* forward referenced, above. * here because it was forward referenced, above.
*/ */
typedef struct ibcm_request_t { typedef struct ibcm_request_t {
ibcm_base_cm_id_t super; ibcm_base_cm_id_t super;
@ -412,9 +403,9 @@ static OBJ_CLASS_INSTANCE(ibcm_request_t, ibcm_base_cm_id_t,
ibcm_request_cm_id_constructor, NULL); ibcm_request_cm_id_constructor, NULL);
/* /*
* Need to maintain a list of pending CM ID replies (in case we need * Need to maintain a list of pending CM ID replies (for error
* to retransmit). Need to use a struct name here because it was * handling if the replies timeout). Need to use a struct name here
* forward referenced, above. * because it was forward referenced, above.
*/ */
typedef struct ibcm_reply_t { typedef struct ibcm_reply_t {
ibcm_base_cm_id_t super; ibcm_base_cm_id_t super;
@ -1693,9 +1684,9 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
opal_mutex_unlock(&ie->ie_lock); opal_mutex_unlock(&ie->ie_lock);
/* If logic above selected a rejection reason, reject this /* If logic above selected a rejection reason, reject this
request. No need to cache this reject message for request. Note that if the same request arrives again later,
retransmission; if the same request arrives again later, IBCM IBCM will trigger a new event and we'll just reject it
will trigger a new event and we'll just reject it again. */ again. */
if (REJ_MAX != rej_reason) { if (REJ_MAX != rej_reason) {
opal_output(-1, "arbitrartion failed -- reject"); opal_output(-1, "arbitrartion failed -- reject");
goto reject; goto reject;
@ -1923,9 +1914,7 @@ static int reply_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
ie->ie_recv_buffers_posted = true; ie->ie_recv_buffers_posted = true;
} }
/* Send the RTU -- note that we don't need to queue it up for /* Send the RTU */
retransmission (see lengthy explanation at beginning of this
file). */
rtu_data.irtud_reply = reply; rtu_data.irtud_reply = reply;
rtu_data.irtud_qp_index = p->irepd_qp_index; rtu_data.irtud_qp_index = p->irepd_qp_index;
if (0 != ib_cm_send_rtu(event->cm_id, &rtu_data, sizeof(rtu_data))) { if (0 != ib_cm_send_rtu(event->cm_id, &rtu_data, sizeof(rtu_data))) {
@ -1933,8 +1922,8 @@ static int reply_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
return OMPI_ERR_IN_ERRNO; return OMPI_ERR_IN_ERRNO;
} }
/* Remove the pending request so that we don't try to retransmit /* Remove the pending request because we won't need to handle
it */ errors for it */
opal_output(-1, "reply received cm id %p -- original cached req %p", opal_output(-1, "reply received cm id %p -- original cached req %p",
(void*)cmh->listen_cm_id, (void*)request); (void*)cmh->listen_cm_id, (void*)request);
opal_list_remove_item(&ibcm_pending_requests, &(request->super.super)); opal_list_remove_item(&ibcm_pending_requests, &(request->super.super));
@ -1967,21 +1956,18 @@ static int ready_to_use_received(ibcm_listen_cm_id_t *h,
/* Move the QP to RTS */ /* Move the QP to RTS */
if (OMPI_SUCCESS != (rc = qp_to_rts(p->irtud_qp_index, if (OMPI_SUCCESS != (rc = qp_to_rts(p->irtud_qp_index,
event->cm_id, endpoint))) { event->cm_id, endpoint))) {
/* JMS */
opal_output(-1, "ib cm rtu handler: failed move to RTS (index %d)", opal_output(-1, "ib cm rtu handler: failed move to RTS (index %d)",
p->irtud_qp_index); p->irtud_qp_index);
return rc; return rc;
} }
/* Remove the pending reply so that we don't try to retransmit /* Remove the pending reply because we won't need to handle errors
it */ for it */
opal_output(-1, "RTU received cm id %p -- original cached reply %p", opal_output(-1, "RTU received cm id %p -- original cached reply %p",
(void*)event->cm_id, (void*)reply); (void*)event->cm_id, (void*)reply);
opal_list_remove_item(&ibcm_pending_replies, &(reply->super.super)); opal_list_remove_item(&ibcm_pending_replies, &(reply->super.super));
OBJ_RELEASE(reply); OBJ_RELEASE(reply);
/* JMS Send a 0 byte message to the other side to ACK this RTU */
/* Have all the QP's been connected? If so, tell the main BTL /* Have all the QP's been connected? If so, tell the main BTL
that we're done. */ that we're done. */
if (0 == --(ie->ie_qps_to_connect)) { if (0 == --(ie->ie_qps_to_connect)) {
@ -2042,8 +2028,8 @@ static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
opal_output(-1, "ibcm rej handler: WRONG_DIRECTION unexpected!"); opal_output(-1, "ibcm rej handler: WRONG_DIRECTION unexpected!");
} else { } else {
/* Remove from the global pending_requests list (because /* Remove from the global pending_requests list because we
it no longer needs to be retransmitted upon timeout) */ no longer need to handle errors for it */
opal_output(-1, "reply received cm id %p -- original cached req %p", opal_output(-1, "reply received cm id %p -- original cached req %p",
(void*)cmh->listen_cm_id, (void*)cmh->listen_cm_id,
(void*)request); (void*)request);
@ -2077,7 +2063,6 @@ static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event) static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
{ {
int rc;
ibcm_request_t *req; ibcm_request_t *req;
opal_output(-1, "ibcm handler: request error!"); opal_output(-1, "ibcm handler: request error!");
@ -2100,23 +2085,13 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
return OMPI_ERR_NOT_FOUND; return OMPI_ERR_NOT_FOUND;
} }
rc = ib_cm_send_req(event->cm_id, &(req->cm_req)); /* JMS need to barf this connection request appropriately */
if (0 != rc) {
opal_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
"ib_cm function error", true,
orte_process_info.nodename,
"ib_cm_send_req", rc, strerror(rc));
return OMPI_ERR_UNREACH;
}
opal_output(-1, "Retransmitted IBCM request (CM ID: %p)",
(void*)event->cm_id);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event) static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
{ {
int rc;
ibcm_reply_t *rep; ibcm_reply_t *rep;
opal_output(-1, "ibcm handler: reply error!"); opal_output(-1, "ibcm handler: reply error!");
@ -2139,17 +2114,7 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
return OMPI_ERR_NOT_FOUND; return OMPI_ERR_NOT_FOUND;
} }
rc = ib_cm_send_rep(event->cm_id, &(rep->cm_rep)); /* JMS need to barf this connection request appropriately */
if (0 != rc) {
opal_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
"ib_cm function error", true,
orte_process_info.nodename,
"ib_cm_send_rep", rc, strerror(rc));
return OMPI_ERR_UNREACH;
}
opal_output(-1, "Retransmitted IBCM reply (CM ID: %p)",
(void*)event->cm_id);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }