From 2031bb6f01a3d78f83759db7c6260997916c44eb Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 18 Feb 2016 20:55:48 -0700 Subject: [PATCH] btl/openib: XRC save SRQ#s on the loopback endpoint This commit fixes a bug that can occur when communicating via XRC to peers on the same node. UDCM was not saving the SRQ numbers on the loopback endpoint (which shares its ib_addr info with all local peers) so any messages to local peers use an invalid SRQ number. Fixes open-mpi/ompi#1383 Signed-off-by: Nathan Hjelm --- .../mca/btl/openib/connect/btl_openib_connect_udcm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index 245fca5621..2dd5caead0 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -551,6 +551,18 @@ static int udcm_endpoint_init_self_xrc (struct mca_btl_base_endpoint_t *lcl_ep) break; } + for (int i = 0 ; i < mca_btl_openib_component.num_xrc_qps ; ++i) { + uint32_t srq_num; +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + if (ibv_get_srq_num(lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq, &srq_num)) { + BTL_ERROR(("BTL openib UDCM internal error: can't get srq num")); + } +#else + srq_num = lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq->xrc_srq_num; +#endif + lcl_ep->rem_info.rem_srqs[i].rem_srq_num = srq_num; + } + #if OPAL_HAVE_CONNECTX_XRC_DOMAINS recv_qpn = lcl_ep->xrc_recv_qp->qp_num; #else