diff --git a/ompi/mca/btl/openib/btl_openib.h b/ompi/mca/btl/openib/btl_openib.h index 30fcc59f49..974e11e130 100644 --- a/ompi/mca/btl/openib/btl_openib.h +++ b/ompi/mca/btl/openib/btl_openib.h @@ -303,6 +303,7 @@ struct mca_btl_openib_component_t { int gid_index; /** Whether we want a dynamically resizing srq, enabled by default */ bool enable_srq_resize; + int memory_registration_verbose; #if BTL_OPENIB_FAILOVER_ENABLED int verbose_failover; #endif diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index f0e78fae03..605dce98ff 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -598,6 +598,10 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size, return OMPI_ERR_OUT_OF_RESOURCE; } + OPAL_OUTPUT_VERBOSE((30, mca_btl_openib_component.memory_registration_verbose, + "openib_reg_mr: base=%p, bound=%p, size=%d", reg->base, reg->bound, + (int) (reg->bound - reg->base + 1))); + #if OMPI_CUDA_SUPPORT if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) { mca_common_cuda_register(base, size, @@ -613,6 +617,10 @@ static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg) mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data; mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg; + OPAL_OUTPUT_VERBOSE((30, mca_btl_openib_component.memory_registration_verbose, + "openib_dereg_mr: base=%p, bound=%p, size=%d", reg->base, reg->bound, + (int) (reg->bound - reg->base + 1))); + if(openib_reg->mr != NULL) { if(ibv_dereg_mr(openib_reg->mr)) { BTL_ERROR(("%s: error unpinning openib memory errno says %s", diff --git a/ompi/mca/btl/openib/btl_openib_mca.c b/ompi/mca/btl/openib/btl_openib_mca.c index bbfbcf5c63..8aeca3602f 100644 --- a/ompi/mca/btl/openib/btl_openib_mca.c +++ b/ompi/mca/btl/openib/btl_openib_mca.c @@ -220,7 +220,7 @@ int btl_openib_register_mca_params(void) char default_qps[100]; uint32_t mid_qp_size; char *msg, *str; - int ret, tmp; + int ret, tmp, tmp1; ret = OMPI_SUCCESS; #define CHECK(expr) do {\ @@ -522,6 +522,13 @@ int btl_openib_register_mca_params(void) "Maximum size (in bytes) of a single fragment of a long message when using the RDMA protocols (must be > 0 and <= hw capabilities).", 0, &mca_btl_openib_component.max_hw_msg_size, 0)); + /* Help debug memory registration issues */ + CHECK(reg_int("memory_registration_verbose", NULL, + "Output some verbose memory registration information " + "(0 = no output, nonzero = output)", 0, &tmp1, 0)); + mca_btl_openib_component.memory_registration_verbose = opal_output_open(NULL); + opal_output_set_verbosity(mca_btl_openib_component.memory_registration_verbose, tmp1); + /* Info only */ tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, "have_fork_support",