btl/openib: XRC save SRQ#s on the loopback endpoint
This commit fixes a bug that can occur when communicating via XRC to peers on the same node. UDCM was not saving the SRQ numbers on the loopback endpoint (which shares its ib_addr info with all local peers) so any messages to local peers use an invalid SRQ number. Fixes open-mpi/ompi#1383 Signed-off-by: Nathan Hjelm <hjelmn@me.com>
Этот коммит содержится в:
родитель
bfd4254a7b
Коммит
2031bb6f01
@ -551,6 +551,18 @@ static int udcm_endpoint_init_self_xrc (struct mca_btl_base_endpoint_t *lcl_ep)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0 ; i < mca_btl_openib_component.num_xrc_qps ; ++i) {
|
||||||
|
uint32_t srq_num;
|
||||||
|
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
|
||||||
|
if (ibv_get_srq_num(lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq, &srq_num)) {
|
||||||
|
BTL_ERROR(("BTL openib UDCM internal error: can't get srq num"));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
srq_num = lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq->xrc_srq_num;
|
||||||
|
#endif
|
||||||
|
lcl_ep->rem_info.rem_srqs[i].rem_srq_num = srq_num;
|
||||||
|
}
|
||||||
|
|
||||||
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
|
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
|
||||||
recv_qpn = lcl_ep->xrc_recv_qp->qp_num;
|
recv_qpn = lcl_ep->xrc_recv_qp->qp_num;
|
||||||
#else
|
#else
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user