From 95e637f5baa39383efc168c193760dc88c587db6 Mon Sep 17 00:00:00 2001 From: Mike Dubman Date: Wed, 14 May 2014 13:45:16 +0000 Subject: [PATCH] OSHMEM: fix error message when aborting on OOM fixed by Roman, reviewed by Miked cmr=v1.8.2:reviewer=ompi-rm1.8 This commit was SVN r31752. --- oshmem/mca/memheap/base/Makefile.am | 2 +- .../{help-shmem-mca.txt => help-oshmem-memheap.txt} | 0 oshmem/mca/memheap/base/memheap_base_select.c | 2 +- oshmem/mca/spml/ikrit/Makefile.am | 2 +- ...shmem-spml-ikrit.txt => help-oshmem-spml-ikrit.txt} | 0 oshmem/mca/spml/ikrit/spml_ikrit.c | 2 +- oshmem/mca/spml/ikrit/spml_ikrit_component.c | 10 +++++----- oshmem/mca/spml/yoda/Makefile.am | 2 +- ...p-shmem-spml-yoda.txt => help-oshmem-spml-yoda.txt} | 8 ++++++++ oshmem/mca/spml/yoda/spml_yoda.c | 7 ++++++- 10 files changed, 24 insertions(+), 11 deletions(-) rename oshmem/mca/memheap/base/{help-shmem-mca.txt => help-oshmem-memheap.txt} (100%) rename oshmem/mca/spml/ikrit/{help-shmem-spml-ikrit.txt => help-oshmem-spml-ikrit.txt} (100%) rename oshmem/mca/spml/yoda/{help-shmem-spml-yoda.txt => help-oshmem-spml-yoda.txt} (73%) diff --git a/oshmem/mca/memheap/base/Makefile.am b/oshmem/mca/memheap/base/Makefile.am index 4d9162449d..ef4c25b7ef 100644 --- a/oshmem/mca/memheap/base/Makefile.am +++ b/oshmem/mca/memheap/base/Makefile.am @@ -7,7 +7,7 @@ # $HEADER$ # -dist_oshmemdata_DATA += base/help-shmem-mca.txt +dist_oshmemdata_DATA += base/help-oshmem-memheap.txt headers += \ base/base.h diff --git a/oshmem/mca/memheap/base/help-shmem-mca.txt b/oshmem/mca/memheap/base/help-oshmem-memheap.txt similarity index 100% rename from oshmem/mca/memheap/base/help-shmem-mca.txt rename to oshmem/mca/memheap/base/help-oshmem-memheap.txt diff --git a/oshmem/mca/memheap/base/memheap_base_select.c b/oshmem/mca/memheap/base/memheap_base_select.c index 506a8f8a9f..89cb9990d3 100644 --- a/oshmem/mca/memheap/base/memheap_base_select.c +++ b/oshmem/mca/memheap/base/memheap_base_select.c @@ -150,7 +150,7 @@ int mca_memheap_base_select() /* Verify that some module was initialized */ if (NULL == mca_memheap_base_module_initialized) { - opal_show_help("help-shmem-mca.txt", + opal_show_help("help-oshmem-memheap.txt", "find-available:none-found", true, "memheap"); diff --git a/oshmem/mca/spml/ikrit/Makefile.am b/oshmem/mca/spml/ikrit/Makefile.am index b87bdecf08..c256c03b04 100644 --- a/oshmem/mca/spml/ikrit/Makefile.am +++ b/oshmem/mca/spml/ikrit/Makefile.am @@ -10,7 +10,7 @@ # dist_oshmemdata_DATA = \ - help-shmem-spml-ikrit.txt + help-oshmem-spml-ikrit.txt AM_CPPFLAGS = $(spml_ikrit_CPPFLAGS) diff --git a/oshmem/mca/spml/ikrit/help-shmem-spml-ikrit.txt b/oshmem/mca/spml/ikrit/help-oshmem-spml-ikrit.txt similarity index 100% rename from oshmem/mca/spml/ikrit/help-shmem-spml-ikrit.txt rename to oshmem/mca/spml/ikrit/help-oshmem-spml-ikrit.txt diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.c b/oshmem/mca/spml/ikrit/spml_ikrit.c index 9e4a329be9..56d796dcf7 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit.c @@ -413,7 +413,7 @@ int mca_spml_ikrit_add_procs(oshmem_proc_t** procs, size_t nprocs) #else err = mxm_ep_get_address(mca_spml_ikrit.mxm_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len); if (MXM_OK != err) { - orte_show_help("help-shmem-spml-ikrit.txt", "unable to get endpoint address", true, + orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true, mxm_error_string(err)); return OSHMEM_ERROR; } diff --git a/oshmem/mca/spml/ikrit/spml_ikrit_component.c b/oshmem/mca/spml/ikrit/spml_ikrit_component.c index 305eb39513..cd81358027 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit_component.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit_component.c @@ -74,7 +74,7 @@ static int check_mxm_tls(char *var) "%s=%s", var, getenv(var) )) { - orte_show_help("help-shmem-spml-ikrit.txt", "mxm tls", true, + orte_show_help("help-oshmem-spml-ikrit.txt", "mxm tls", true, str); free(str); } @@ -190,7 +190,7 @@ int spml_ikrit_progress(void) err = mxm_progress(mca_spml_ikrit.mxm_context); if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err)) { - orte_show_help("help-shmem-spml-ikrit.txt", + orte_show_help("help-oshmem-spml-ikrit.txt", "errors during mxm_progress", true, mxm_error_string(err)); @@ -240,7 +240,7 @@ static int mca_spml_ikrit_component_open(void) SPML_VERBOSE(1, "No supported device found, disqualifying spml/ikrit"); } else { - orte_show_help("help-shmem-spml-ikrit.txt", + orte_show_help("help-oshmem-spml-ikrit.txt", "mxm init", true, mxm_error_string(err)); @@ -252,7 +252,7 @@ static int mca_spml_ikrit_component_open(void) MXM_SHMEM_MQ_ID, &mca_spml_ikrit.mxm_mq); if (MXM_OK != err) { - orte_show_help("help-shmem-spml-ikrit.txt", + orte_show_help("help-oshmem-spml-ikrit.txt", "mxm mq create", true, mxm_error_string(err)); @@ -295,7 +295,7 @@ static int spml_ikrit_mxm_init(void) mca_spml_ikrit.mxm_ep_opts, &mca_spml_ikrit.mxm_ep); if (MXM_OK != err) { - orte_show_help("help-shmem-spml-ikrit.txt", + orte_show_help("help-oshmem-spml-ikrit.txt", "unable to create endpoint", true, mxm_error_string(err)); diff --git a/oshmem/mca/spml/yoda/Makefile.am b/oshmem/mca/spml/yoda/Makefile.am index 5ac94c527b..06fa18a7f2 100644 --- a/oshmem/mca/spml/yoda/Makefile.am +++ b/oshmem/mca/spml/yoda/Makefile.am @@ -10,7 +10,7 @@ # dist_oshmemdata_DATA = \ - help-shmem-spml-yoda.txt + help-oshmem-spml-yoda.txt EXTRA_DIST = post_configure.sh diff --git a/oshmem/mca/spml/yoda/help-shmem-spml-yoda.txt b/oshmem/mca/spml/yoda/help-oshmem-spml-yoda.txt similarity index 73% rename from oshmem/mca/spml/yoda/help-shmem-spml-yoda.txt rename to oshmem/mca/spml/yoda/help-oshmem-spml-yoda.txt index e57f6efdf7..a56e30c058 100644 --- a/oshmem/mca/spml/yoda/help-shmem-spml-yoda.txt +++ b/oshmem/mca/spml/yoda/help-oshmem-spml-yoda.txt @@ -19,3 +19,11 @@ name and its corresponding minimum value is shown below. BTL eager limit value: %d (set via btl_%s_eager_limit) BTL eager limit minimum: %d MCA parameter name: btl_%s_eager_limit + +[internal_oom_error] +'%s' operation failed. Unable to allocate buffer, need %d bytes. +Try increasing 'spml_yoda_bml_alloc_threshold' value or setting it to '0' to +force waiting for all puts completion. + + spml_yoda_bml_alloc_threshold: %d + diff --git a/oshmem/mca/spml/yoda/spml_yoda.c b/oshmem/mca/spml/yoda/spml_yoda.c index 4fd7d5bde4..bad3658289 100644 --- a/oshmem/mca/spml/yoda/spml_yoda.c +++ b/oshmem/mca/spml/yoda/spml_yoda.c @@ -10,6 +10,7 @@ #include "oshmem_config.h" +#include "opal/util/show_help.h" #include "orte/include/orte/types.h" #include "orte/runtime/orte_globals.h" @@ -823,9 +824,13 @@ static inline int mca_spml_yoda_put_internal(void *dst_addr, put_via_send); if (OPAL_UNLIKELY(!des || !des->des_src )) { - SPML_ERROR("shmem OOM error need %d bytes", ncopied); SPML_ERROR("src=%p nfrags = %d frag_size=%d", src_addr, nfrags, frag_size); + SPML_ERROR("shmem OOM error need %d bytes", ncopied); + opal_show_help("help-oshmem-spml-yoda.txt", + "internal_oom_error", + true, + "Put", ncopied, mca_spml_yoda.bml_alloc_threshold); oshmem_shmem_abort(-1); }