1
1

Adding error handling in OpenIB BTL

bugfix: major: openib send credits returned correctly after a fault for pending frags to dead processes; also tweak the default IB retry timeouts tomake this happen faster

Make it compile in non-debug builds

Mark the IB endpoint as failed when invoking an error; this resolves UDCM connection deadlocks

Changing the default IB retry timeouts is not a good idea.
We'll need to find another way to speedup credit recovery in failure cases.

Remove ULFM specific cases

Signed-off-by: Aurelien Bouteiller <bouteill@icl.utk.edu>
Этот коммит содержится в:
Aurélien Bouteiller 2016-02-22 11:16:43 -05:00 коммит произвёл Aurelien Bouteiller
родитель 1b96be5f2f
Коммит e46c907468
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 08F60797C5941DB2
2 изменённых файлов: 45 добавлений и 10 удалений

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -3667,7 +3667,7 @@ error:
#endif
if(IBV_WC_WR_FLUSH_ERR != wc->status || !flush_err_printed[cq]++) {
BTL_PEER_ERROR(remote_proc, ("error polling %s with status %s "
BTL_PEER_ERROR(remote_proc, ("error polling %s with status %s"
"status number %d for wr_id %" PRIx64 " opcode %d vendor error %d qp_idx %d",
cq_name[cq], btl_openib_component_status_to_string(wc->status),
wc->status, wc->wr_id,
@ -3708,9 +3708,36 @@ error:
}
}
if(openib_btl)
if(openib_btl) {
/* return send wqe */
qp_put_wqe(endpoint, qp);
/* return wqes that were sent before this frag */
n = qp_frag_to_wqe(endpoint, qp, to_com_frag(des));
/* force emptying the pending frags toward the dead endpoint
* in progress_pending_frags* below */
endpoint->endpoint_state = MCA_BTL_IB_FAILED;
if(IBV_WC_SEND == wc->opcode && !BTL_OPENIB_QP_TYPE_PP(qp)) {
BTL_VERBOSE(("frag %p returning %d credits", frag, 1+n));
OPAL_THREAD_FETCH_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n);
/* new SRQ credit available. Try to progress pending frags*/
progress_pending_frags_srq(openib_btl, qp);
}
/* new wqe or/and get token available. Try to progress pending frags */
progress_pending_frags_wqe(endpoint, qp);
mca_btl_openib_frag_progress_pending_put_get(endpoint, qp);
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
des->des_cbfunc(&openib_btl->super, endpoint, des, wc->status);
}
if (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
mca_btl_openib_free(&openib_btl->super, des);
}
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL,
(struct opal_proc_t*)remote_proc, NULL);
}
}
static int poll_device(mca_btl_openib_device_t* device, int count)

Просмотреть файл

@ -502,6 +502,7 @@ void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
mca_btl_openib_frag_t *openib_frag;
mca_btl_openib_com_frag_t *com_frag;
mca_btl_openib_control_header_t *ctl_hdr;
int rc;
OPAL_OUTPUT((-1, "SENDING CTS to %s on qp index %d (QP num %d)",
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
@ -538,11 +539,14 @@ void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
ctl_hdr->type = MCA_BTL_OPENIB_CONTROL_CTS;
/* Send the fragment */
if (OPAL_SUCCESS != mca_btl_openib_endpoint_post_send(endpoint, sc_frag)) {
BTL_ERROR(("Failed to post CTS send"));
mca_btl_openib_endpoint_invoke_error(endpoint);
if (OPAL_SUCCESS != (rc = mca_btl_openib_endpoint_post_send(endpoint, sc_frag))) {
if( OPAL_ERR_RESOURCE_BUSY != rc ) {
BTL_ERROR(("Failed to post CTS send"));
mca_btl_openib_endpoint_invoke_error(endpoint);
}
} else {
endpoint->endpoint_cts_sent = true;
}
endpoint->endpoint_cts_sent = true;
}
/*
@ -611,8 +615,8 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
mca_btl_openib_send_frag_t *frag;
mca_btl_openib_endpoint_t *ep;
bool master = false;
int rc;
opal_output(-1, "Now we are CONNECTED");
if (MCA_BTL_XRC_ENABLED) {
opal_mutex_lock (&endpoint->ib_addr->addr_lock);
if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) {
@ -664,8 +668,11 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
frag = to_send_frag(frag_item);
/* We need to post this one */
if (OPAL_ERROR == mca_btl_openib_endpoint_post_send(endpoint, frag)) {
BTL_ERROR(("Error posting send"));
if(OPAL_SUCCESS != (rc = mca_btl_openib_endpoint_post_send(endpoint, frag))) {
/* if we are out of resources, let's try to reschedule everything later */
if( OPAL_ERR_RESOURCE_BUSY != rc ) {
BTL_ERROR(("Error posting send"));
}
}
}
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
@ -1030,6 +1037,7 @@ void *mca_btl_openib_endpoint_invoke_error(void *context)
}
} else {
btl = endpoint->endpoint_btl;
endpoint->endpoint_state = MCA_BTL_IB_FAILED;
}
/* If we didn't find a BTL, then just bail :-( */