Remove all retransmission code; the IBCM kernel module handles all of
that for us. This commit was SVN r18432.
Этот коммит содержится в:
родитель
74bf1ae25f
Коммит
d8e5608053
@ -12,10 +12,9 @@
|
|||||||
* TO-DO:
|
* TO-DO:
|
||||||
*
|
*
|
||||||
* - audit control values passed to req_send()
|
* - audit control values passed to req_send()
|
||||||
* - Somehow handle retransmission of RTU; don't know how to do this
|
|
||||||
* yet. :-(
|
|
||||||
* - More show_help() throughout
|
* - More show_help() throughout
|
||||||
* - ...?
|
* - error handling in case of broken connection is not good; need to
|
||||||
|
* notify btl module safely
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -219,23 +218,15 @@
|
|||||||
* start_connect() will have created them already), create all
|
* start_connect() will have created them already), create all
|
||||||
* num_qps QPs. Also post receive buffers on all QPs.
|
* num_qps QPs. Also post receive buffers on all QPs.
|
||||||
*
|
*
|
||||||
* The IBCM CPC handles the retransmission of request and reply
|
* We wholly reply on the IBCM system for all retransmissions and
|
||||||
* messages, but not RTUs. If we get a REQ_ERROR or REP_ERROR, it
|
* duplicate filtering of IBCM requests, replies, and RTUs. If IBCM
|
||||||
* means that the IBCM system tried to retransmit a few times and
|
* reports a timeout error up to OMPI, we abort the connection. Lists
|
||||||
* failed. Honestly, we could probably just give up at this point,
|
* are maintained of pending IBCM requests and replies solely for
|
||||||
* but one of the point of the IBCM CPC is to work at large scale, so
|
* error handling; request/reply timeouts are reported via CM ID. We
|
||||||
* there might be heavy-duty congestion such that the UD messages are
|
* can cross-reference this CM ID to the endpoint that it was trying
|
||||||
* actually getting lost. So we'll find the request / reply and
|
* to connect via these lists.
|
||||||
* re-send it -- we'll do this indefinitely.
|
|
||||||
*
|
*
|
||||||
* We wholly rely on the IBCM system to retransmit the RTU, however.
|
* Note that there is a race condition: because UD is unordered, the
|
||||||
* If the passive side doesn't receive the RTU, it'll automatically
|
|
||||||
* retransmit the reply (or raise a REP_ERROR and have us retransmit
|
|
||||||
* it). If the active side had already sent the RTU, it'll recognize
|
|
||||||
* the incoming reply as a duplicate and therefore retransmit the RTU
|
|
||||||
* for us. Nifty.
|
|
||||||
*
|
|
||||||
* However, there is a race condition: because UD is unordered, the
|
|
||||||
* first message may arrive on the QP before the RTU has arrived.
|
* first message may arrive on the QP before the RTU has arrived.
|
||||||
* This will cause an IBV_EVENT_COMM_EST event to be raised, which
|
* This will cause an IBV_EVENT_COMM_EST event to be raised, which
|
||||||
* would then be picked up by the async event handler in the
|
* would then be picked up by the async event handler in the
|
||||||
@ -387,9 +378,9 @@ typedef struct {
|
|||||||
static OBJ_CLASS_INSTANCE(ibcm_base_cm_id_t, opal_list_item_t, NULL, NULL);
|
static OBJ_CLASS_INSTANCE(ibcm_base_cm_id_t, opal_list_item_t, NULL, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Need to maintain a list of pending CM ID requests (in case we need
|
* Need to maintain a list of pending CM ID requests (for error
|
||||||
* to retransmit). Need to use the struct name here because it was
|
* handling if the requests timeout). Need to use the struct name
|
||||||
* forward referenced, above.
|
* here because it was forward referenced, above.
|
||||||
*/
|
*/
|
||||||
typedef struct ibcm_request_t {
|
typedef struct ibcm_request_t {
|
||||||
ibcm_base_cm_id_t super;
|
ibcm_base_cm_id_t super;
|
||||||
@ -412,9 +403,9 @@ static OBJ_CLASS_INSTANCE(ibcm_request_t, ibcm_base_cm_id_t,
|
|||||||
ibcm_request_cm_id_constructor, NULL);
|
ibcm_request_cm_id_constructor, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Need to maintain a list of pending CM ID replies (in case we need
|
* Need to maintain a list of pending CM ID replies (for error
|
||||||
* to retransmit). Need to use a struct name here because it was
|
* handling if the replies timeout). Need to use a struct name here
|
||||||
* forward referenced, above.
|
* because it was forward referenced, above.
|
||||||
*/
|
*/
|
||||||
typedef struct ibcm_reply_t {
|
typedef struct ibcm_reply_t {
|
||||||
ibcm_base_cm_id_t super;
|
ibcm_base_cm_id_t super;
|
||||||
@ -1693,9 +1684,9 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
|
|||||||
opal_mutex_unlock(&ie->ie_lock);
|
opal_mutex_unlock(&ie->ie_lock);
|
||||||
|
|
||||||
/* If logic above selected a rejection reason, reject this
|
/* If logic above selected a rejection reason, reject this
|
||||||
request. No need to cache this reject message for
|
request. Note that if the same request arrives again later,
|
||||||
retransmission; if the same request arrives again later, IBCM
|
IBCM will trigger a new event and we'll just reject it
|
||||||
will trigger a new event and we'll just reject it again. */
|
again. */
|
||||||
if (REJ_MAX != rej_reason) {
|
if (REJ_MAX != rej_reason) {
|
||||||
opal_output(-1, "arbitrartion failed -- reject");
|
opal_output(-1, "arbitrartion failed -- reject");
|
||||||
goto reject;
|
goto reject;
|
||||||
@ -1923,9 +1914,7 @@ static int reply_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
ie->ie_recv_buffers_posted = true;
|
ie->ie_recv_buffers_posted = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Send the RTU -- note that we don't need to queue it up for
|
/* Send the RTU */
|
||||||
retransmission (see lengthy explanation at beginning of this
|
|
||||||
file). */
|
|
||||||
rtu_data.irtud_reply = reply;
|
rtu_data.irtud_reply = reply;
|
||||||
rtu_data.irtud_qp_index = p->irepd_qp_index;
|
rtu_data.irtud_qp_index = p->irepd_qp_index;
|
||||||
if (0 != ib_cm_send_rtu(event->cm_id, &rtu_data, sizeof(rtu_data))) {
|
if (0 != ib_cm_send_rtu(event->cm_id, &rtu_data, sizeof(rtu_data))) {
|
||||||
@ -1933,8 +1922,8 @@ static int reply_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
return OMPI_ERR_IN_ERRNO;
|
return OMPI_ERR_IN_ERRNO;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remove the pending request so that we don't try to retransmit
|
/* Remove the pending request because we won't need to handle
|
||||||
it */
|
errors for it */
|
||||||
opal_output(-1, "reply received cm id %p -- original cached req %p",
|
opal_output(-1, "reply received cm id %p -- original cached req %p",
|
||||||
(void*)cmh->listen_cm_id, (void*)request);
|
(void*)cmh->listen_cm_id, (void*)request);
|
||||||
opal_list_remove_item(&ibcm_pending_requests, &(request->super.super));
|
opal_list_remove_item(&ibcm_pending_requests, &(request->super.super));
|
||||||
@ -1967,21 +1956,18 @@ static int ready_to_use_received(ibcm_listen_cm_id_t *h,
|
|||||||
/* Move the QP to RTS */
|
/* Move the QP to RTS */
|
||||||
if (OMPI_SUCCESS != (rc = qp_to_rts(p->irtud_qp_index,
|
if (OMPI_SUCCESS != (rc = qp_to_rts(p->irtud_qp_index,
|
||||||
event->cm_id, endpoint))) {
|
event->cm_id, endpoint))) {
|
||||||
/* JMS */
|
|
||||||
opal_output(-1, "ib cm rtu handler: failed move to RTS (index %d)",
|
opal_output(-1, "ib cm rtu handler: failed move to RTS (index %d)",
|
||||||
p->irtud_qp_index);
|
p->irtud_qp_index);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remove the pending reply so that we don't try to retransmit
|
/* Remove the pending reply because we won't need to handle errors
|
||||||
it */
|
for it */
|
||||||
opal_output(-1, "RTU received cm id %p -- original cached reply %p",
|
opal_output(-1, "RTU received cm id %p -- original cached reply %p",
|
||||||
(void*)event->cm_id, (void*)reply);
|
(void*)event->cm_id, (void*)reply);
|
||||||
opal_list_remove_item(&ibcm_pending_replies, &(reply->super.super));
|
opal_list_remove_item(&ibcm_pending_replies, &(reply->super.super));
|
||||||
OBJ_RELEASE(reply);
|
OBJ_RELEASE(reply);
|
||||||
|
|
||||||
/* JMS Send a 0 byte message to the other side to ACK this RTU */
|
|
||||||
|
|
||||||
/* Have all the QP's been connected? If so, tell the main BTL
|
/* Have all the QP's been connected? If so, tell the main BTL
|
||||||
that we're done. */
|
that we're done. */
|
||||||
if (0 == --(ie->ie_qps_to_connect)) {
|
if (0 == --(ie->ie_qps_to_connect)) {
|
||||||
@ -2042,8 +2028,8 @@ static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
opal_output(-1, "ibcm rej handler: WRONG_DIRECTION unexpected!");
|
opal_output(-1, "ibcm rej handler: WRONG_DIRECTION unexpected!");
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
/* Remove from the global pending_requests list (because
|
/* Remove from the global pending_requests list because we
|
||||||
it no longer needs to be retransmitted upon timeout) */
|
no longer need to handle errors for it */
|
||||||
opal_output(-1, "reply received cm id %p -- original cached req %p",
|
opal_output(-1, "reply received cm id %p -- original cached req %p",
|
||||||
(void*)cmh->listen_cm_id,
|
(void*)cmh->listen_cm_id,
|
||||||
(void*)request);
|
(void*)request);
|
||||||
@ -2077,7 +2063,6 @@ static int reject_received(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
|
|
||||||
static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
||||||
{
|
{
|
||||||
int rc;
|
|
||||||
ibcm_request_t *req;
|
ibcm_request_t *req;
|
||||||
opal_output(-1, "ibcm handler: request error!");
|
opal_output(-1, "ibcm handler: request error!");
|
||||||
|
|
||||||
@ -2100,23 +2085,13 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
return OMPI_ERR_NOT_FOUND;
|
return OMPI_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = ib_cm_send_req(event->cm_id, &(req->cm_req));
|
/* JMS need to barf this connection request appropriately */
|
||||||
if (0 != rc) {
|
|
||||||
opal_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
|
||||||
"ib_cm function error", true,
|
|
||||||
orte_process_info.nodename,
|
|
||||||
"ib_cm_send_req", rc, strerror(rc));
|
|
||||||
return OMPI_ERR_UNREACH;
|
|
||||||
}
|
|
||||||
opal_output(-1, "Retransmitted IBCM request (CM ID: %p)",
|
|
||||||
(void*)event->cm_id);
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
||||||
{
|
{
|
||||||
int rc;
|
|
||||||
ibcm_reply_t *rep;
|
ibcm_reply_t *rep;
|
||||||
opal_output(-1, "ibcm handler: reply error!");
|
opal_output(-1, "ibcm handler: reply error!");
|
||||||
|
|
||||||
@ -2139,17 +2114,7 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
return OMPI_ERR_NOT_FOUND;
|
return OMPI_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = ib_cm_send_rep(event->cm_id, &(rep->cm_rep));
|
/* JMS need to barf this connection request appropriately */
|
||||||
if (0 != rc) {
|
|
||||||
opal_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
|
||||||
"ib_cm function error", true,
|
|
||||||
orte_process_info.nodename,
|
|
||||||
"ib_cm_send_rep", rc, strerror(rc));
|
|
||||||
return OMPI_ERR_UNREACH;
|
|
||||||
}
|
|
||||||
opal_output(-1, "Retransmitted IBCM reply (CM ID: %p)",
|
|
||||||
(void*)event->cm_id);
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user