From 5cf43de44538b818b014bdad0490439e2d212395 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Thu, 1 Nov 2018 14:03:00 -0700 Subject: [PATCH] MTL/OFI: Check threshold number of peers allowed per rank When the provider does not support FI_REMOTE_CQ_DATA, the OFI tag does not have sizeof(int) bits for the rank. Therefore, unexpected behavior will occur when this limit is crossed. Check the max allowed number of ranks during add_procs() and return if there is danger of exceeding this threshold. Signed-off-by: Aravind Gopalakrishnan --- ompi/mca/mtl/ofi/mtl_ofi.c | 16 ++++++++++++++++ ompi/mca/mtl/ofi/mtl_ofi_component.c | 4 +++- ompi/mca/mtl/ofi/mtl_ofi_endpoint.h | 8 +++++++- ompi/mca/mtl/ofi/mtl_ofi_types.h | 3 ++- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/ompi/mca/mtl/ofi/mtl_ofi.c b/ompi/mca/mtl/ofi/mtl_ofi.c index 7e19f170e6..b7f0b019b6 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.c +++ b/ompi/mca/mtl/ofi/mtl_ofi.c @@ -54,9 +54,22 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, char *ep_names = NULL; fi_addr_t *fi_addrs = NULL; mca_mtl_ofi_endpoint_t *endpoint = NULL; + int num_peers_limit = (1 << ompi_mtl_ofi.num_bits_source_rank) - 1; namelen = ompi_mtl_ofi.epnamelen; + /* We cannot add more ranks than available tag bits */ + if ((false == ompi_mtl_ofi.fi_cq_data) && + OPAL_UNLIKELY(((int) (nprocs + ompi_mtl_ofi.num_peers) > num_peers_limit))) { + opal_output(0, "%s:%d: OFI provider: %s does not have enough bits for source rank in its tag.\n" + "Adding more ranks will result in undefined behaviour. Please enable\n" + "FI_REMOTE_CQ_DATA feature in the provider. For more info refer fi_cq(3).\n", + __FILE__, __LINE__, ompi_mtl_ofi.provider_name); + fflush(stderr); + ret = OMPI_ERROR; + goto bail; + } + /** * Create array of EP names. */ @@ -126,6 +139,9 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint; } + /* Update global counter of number of procs added to this rank */ + ompi_mtl_ofi.num_peers += nprocs; + ret = OMPI_SUCCESS; bail: diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index 90931fb135..41aa0fd344 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -15,7 +15,6 @@ #include "mtl_ofi.h" #include "opal/util/argv.h" -#include "opal/util/show_help.h" #include "opal/util/printf.h" static int ompi_mtl_ofi_component_open(void); @@ -607,6 +606,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, /* Update the maximum supported Communicator ID */ ompi_mtl_ofi.base.mtl_max_contextid = (int)((1ULL << ofi_tag_bits_for_cid) - 1); + ompi_mtl_ofi.num_peers = 0; /** * Open fabric @@ -740,6 +740,8 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, goto error; } + ompi_mtl_ofi.provider_name = strdup(prov->fabric_attr->prov_name); + /** * Free providers info since it's not needed anymore. */ diff --git a/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h b/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h index 788d091916..11003e675b 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h @@ -41,7 +41,13 @@ typedef struct mca_mtl_ofi_endpoint_t mca_mtl_ofi_endpoint_t; static inline mca_mtl_ofi_endpoint_t *ompi_mtl_ofi_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc) { if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) { - ompi_mtl_ofi_add_procs(mtl, 1, &ompi_proc); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_mtl_ofi_add_procs(mtl, 1, &ompi_proc))) { + /* Fatal error. exit() out */ + opal_output(0, "%s:%d: *** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n", + __FILE__, __LINE__); + fflush(stderr); + exit(1); + } } return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; diff --git a/ompi/mca/mtl/ofi/mtl_ofi_types.h b/ompi/mca/mtl/ofi/mtl_ofi_types.h index af578295fa..2bee022aee 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_types.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_types.h @@ -43,7 +43,7 @@ typedef struct mca_mtl_ofi_module_t { /** "Any source" address */ fi_addr_t any_addr; - /** Optional user-specified OFI provider name */ + /** OFI provider name */ char *provider_name; /** Maximum inject size */ @@ -61,6 +61,7 @@ typedef struct mca_mtl_ofi_module_t { unsigned long long source_rank_mask; unsigned long long mpi_tag_mask; int num_bits_mpi_tag; + int num_peers; /** Synchronous protocol tag bits */ unsigned long long sync_send;