diff --git a/ompi/mca/btl/ugni/btl_ugni.h b/ompi/mca/btl/ugni/btl_ugni.h index d5348fccf7..38097f5220 100644 --- a/ompi/mca/btl/ugni/btl_ugni.h +++ b/ompi/mca/btl/ugni/btl_ugni.h @@ -81,6 +81,9 @@ typedef struct mca_btl_ugni_module_t { /* fragment buffer (for message if lookup) */ opal_hash_table_t pending_smsg_frags; int32_t next_frag_id; + + uint32_t reg_max; + uint32_t reg_count; } mca_btl_ugni_module_t; typedef struct mca_btl_ugni_component_t { @@ -123,6 +126,9 @@ typedef struct mca_btl_ugni_component_t { int smsg_max_credits; /* mailbox size (computed) */ int smsg_mbox_size; + + /* Maximum number of memory registrations per process */ + int max_mem_reg; } mca_btl_ugni_component_t; int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module, diff --git a/ompi/mca/btl/ugni/btl_ugni_add_procs.c b/ompi/mca/btl/ugni/btl_ugni_add_procs.c index 2bb058890c..8647fbc06c 100644 --- a/ompi/mca/btl/ugni/btl_ugni_add_procs.c +++ b/ompi/mca/btl/ugni/btl_ugni_add_procs.c @@ -10,23 +10,15 @@ * $HEADER$ */ -#include -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" - #include "ompi_config.h" #include "btl_ugni.h" #include "btl_ugni_frag.h" -#include "btl_ugni_smsg.h" static int mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module); +static void +mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs); int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl, size_t nprocs, @@ -34,33 +26,30 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t **peers, opal_bitmap_t *reachable) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; - size_t ntotal_procs; - size_t i; + size_t ntotal_procs, nlocal_procs, i; + bool first_time_init = (NULL == ugni_module->endpoints); int rc; if (NULL == ugni_module->endpoints) { (void) ompi_proc_world (&ntotal_procs); ugni_module->endpoints = calloc (ntotal_procs, sizeof (mca_btl_base_endpoint_t *)); - if (OPAL_UNLIKELY(NULL == ugni_module->endpoints)) { return OMPI_ERR_OUT_OF_RESOURCE; } - - rc = mca_btl_ugni_setup_mpools (ugni_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - BTL_ERROR(("btl/ugni error setting up mpools/free lists")); - return rc; - } } - for (i = 0 ; i < nprocs ; ++i) { + for (i = 0, nlocal_procs = 0 ; i < nprocs ; ++i) { struct ompi_proc_t *ompi_proc = procs[i]; uint32_t rem_rank = ompi_proc->proc_name.vpid; if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) { - /* ignore local procs */ - peers[i] = NULL; + nlocal_procs++; + } + + if (OPAL_EQUAL == orte_util_compare_name_fields + (ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, &ompi_proc->proc_name)) { + /* ignore self */ continue; } @@ -78,6 +67,16 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl, ugni_module->endpoints[rem_rank] = peers[i]; } + if (first_time_init) { + mca_btl_ugni_module_set_max_reg (ugni_module, nlocal_procs); + + rc = mca_btl_ugni_setup_mpools (ugni_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + BTL_ERROR(("btl/ugni error setting up mpools/free lists")); + return rc; + } + } + ugni_module->endpoint_count += nprocs; return OMPI_SUCCESS; @@ -113,19 +112,25 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl, return OMPI_SUCCESS; } -static inline int ugni_reg_mem (mca_btl_ugni_module_t *btl, void *base, +static inline int ugni_reg_mem (mca_btl_ugni_module_t *ugni_module, void *base, size_t size, mca_mpool_base_registration_t *reg, gni_cq_handle_t cq, uint32_t flags) { mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg; gni_return_t rc; + + if (ugni_module->reg_count >= ugni_module->reg_max) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - rc = GNI_MemRegister (btl->device->dev_handle, (uint64_t) base, + rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base, size, cq, flags, -1, &(ugni_reg->memory_hdl)); if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) { return OMPI_ERR_OUT_OF_RESOURCE; } + ugni_module->reg_count++; + return OMPI_SUCCESS; } @@ -149,15 +154,17 @@ static int ugni_reg_smsg_mem (void *reg_data, void *base, size_t size, static int ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg) { - mca_btl_ugni_module_t *btl = (mca_btl_ugni_module_t *) reg_data; + mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data; mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg; gni_return_t rc; - rc = GNI_MemDeregister (btl->device->dev_handle, &ugni_reg->memory_hdl); + rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->memory_hdl); if (GNI_RC_SUCCESS != rc) { return OMPI_ERROR; } + ugni_module->reg_count--; + return OMPI_SUCCESS; } @@ -284,3 +291,29 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) return OMPI_SUCCESS; } +static void +mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs) +{ + if (0 == mca_btl_ugni_component.max_mem_reg) { +#if defined(HAVE_GNI_GETJOBRESINFO) + gni_job_res_desc_t res_des; + gni_return_t grc; + + grc = GNI_GetJobResInfo (ugni_module->device->dev_id, ompi_common_ugni_module.ptag, + GNI_JOB_RES_MDD, &res_des); + if (GNI_RC_SUCCESS == grc) { + ugni_module->reg_max = (res_des.limit - res_des.used) / nlocal_procs; + } +#else + /* no way to determine the maximum registration count */ + ugni_module->reg_max = 1200 / nlocal_procs; +#endif + } else if (-1 == mca_btl_ugni_component.max_mem_reg) { + ugni_module->reg_max = INT_MAX; + } else { + ugni_module->reg_max = mca_btl_ugni_component.max_mem_reg; + } + + ugni_module->reg_count = 0; +} + diff --git a/ompi/mca/btl/ugni/btl_ugni_component.c b/ompi/mca/btl/ugni/btl_ugni_component.c index 802ea893bd..f7db8063c9 100644 --- a/ompi/mca/btl/ugni/btl_ugni_component.c +++ b/ompi/mca/btl/ugni/btl_ugni_component.c @@ -78,14 +78,15 @@ btl_ugni_component_register(void) mca_btl_ugni_param_register_int("eager_inc", NULL, 16); mca_btl_ugni_component.cq_size = - mca_btl_ugni_param_register_int("cq_size", NULL, 25000); + mca_btl_ugni_param_register_int("cq_size", NULL, 40000); /* SMSG limit. 0 - autoselect */ mca_btl_ugni_component.ugni_smsg_limit = mca_btl_ugni_param_register_int("smsg_limit", "Maximum size message that " "will be sent using the SMSG/MSGQ protocol " "(0 - autoselect(default), 16k max)", 0); - if (mca_btl_ugni_component.ugni_smsg_limit > 16384) { + + if (16384 < mca_btl_ugni_component.ugni_smsg_limit) { mca_btl_ugni_component.ugni_smsg_limit = 16384; } @@ -97,16 +98,26 @@ btl_ugni_component_register(void) mca_btl_ugni_component.ugni_fma_limit = mca_btl_ugni_param_register_int("fma_limit", "Maximum size message that " "will be sent using the FMA (Fast Memory " - "Access) protocol (default 1024)", + "Access) protocol (default 1024, 64k max)", 1024); + if (65536 < mca_btl_ugni_component.ugni_fma_limit) { + mca_btl_ugni_component.ugni_fma_limit = 65536; + } + mca_btl_ugni_component.ugni_get_limit = mca_btl_ugni_param_register_int("get_limit", "Maximum size message that " - "will be sent using the get protocol " - "(default 512k)", 512 * 1024); + "will be sent using a get protocol " + "(default 4M)", 4 * 1024 * 1024); mca_btl_ugni_component.rdma_max_retries = - mca_btl_ugni_param_register_int("rdma_max_retries", NULL, 8); + mca_btl_ugni_param_register_int("rdma_max_retries", NULL, 16); + + mca_btl_ugni_component.max_mem_reg = + mca_btl_ugni_param_register_int("max_mem_reg", "Maximum number of " + "memory registrations a process can " + "hold (0 - autoselect, -1 - unlimited)" + " (default 0)", 0); mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; @@ -129,6 +140,7 @@ btl_ugni_component_register(void) /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version, &mca_btl_ugni_module.super); + return OMPI_SUCCESS; } @@ -274,7 +286,10 @@ mca_btl_ugni_component_init (int *num_btl_modules, mca_btl_ugni_component.smsg_max_data = mca_btl_ugni_component.ugni_smsg_limit - sizeof (mca_btl_ugni_send_frag_hdr_t); - /* module settings */ + if (mca_btl_ugni_component.ugni_smsg_limit == mca_btl_ugni_module.super.btl_eager_limit) { + mca_btl_ugni_module.super.btl_eager_limit = mca_btl_ugni_component.smsg_max_data; + } + mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit; rc = mca_btl_ugni_smsg_setup ();