From cd48eccbaea06ea80eecccc006999c88a9a275be Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Fri, 24 Nov 2017 08:11:18 -0700 Subject: [PATCH] mtl/ofi: fix problem with mprobe/mrecv At least with some providers (sockets and GNI), the mprobe/mrecv ofi mtl methods were incorrect. For these two providers at least one must supply the original tag and mask bits used with the prior FI_PEEK | FI_CLAIM request that had been used to probe for the message. These providers take a strict interpretation of the following sentence from the libfabric fi_tagged man page: ``` Claimed messages can only be retrieved using a subsequent, paired receive operation with the FI_CLAIM flag set. ``` Signed-off-by: Howard Pritchard --- ompi/mca/mtl/ofi/mtl_ofi.h | 7 +++++-- ompi/mca/mtl/ofi/mtl_ofi_request.h | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ompi/mca/mtl/ofi/mtl_ofi.h b/ompi/mca/mtl/ofi/mtl_ofi.h index 0ee125c796..391e321013 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.h +++ b/ompi/mca/mtl/ofi/mtl_ofi.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2013-2017 Intel, Inc. All rights reserved + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * * $COPYRIGHT$ * @@ -686,8 +688,8 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl, msg.desc = NULL; msg.iov_count = 1; msg.addr = 0; - msg.tag = 0; - msg.ignore = 0; + msg.tag = ofi_req->match_bits; + msg.ignore = ofi_req->mask_bits; msg.context = (void *)&ofi_req->ctx; msg.data = 0; @@ -868,6 +870,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl, ofi_req->error_callback = ompi_mtl_ofi_probe_error_callback; ofi_req->completion_count = 1; ofi_req->match_state = 0; + ofi_req->mask_bits = mask_bits; MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags)); if (-FI_ENOMSG == ret) { diff --git a/ompi/mca/mtl/ofi/mtl_ofi_request.h b/ompi/mca/mtl/ofi/mtl_ofi_request.h index 5e2faad645..15bbd2b014 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_request.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_request.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2013-2016 Intel, Inc. All rights reserved + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * * $COPYRIGHT$ * @@ -71,9 +73,12 @@ struct ompi_mtl_ofi_request_t { /** Flag to prevent MPI_Cancel from cancelling a started Recv request */ volatile bool req_started; - /** Request's tag used in case of an error. */ + /** Request's tag used in case of an error. Also for FI_CLAIM requests. */ uint64_t match_bits; + /** Used to build msg for fi_trecvmsg with FI_CLAIM */ + uint64_t mask_bits; + /** Remote OFI address used when a Recv needs to be ACKed */ fi_addr_t remote_addr;