diff --git a/opal/mca/mpool/rgpusm/mpool_rgpusm.h b/opal/mca/mpool/rgpusm/mpool_rgpusm.h index bcca8942ad..9a89e1b780 100644 --- a/opal/mca/mpool/rgpusm/mpool_rgpusm.h +++ b/opal/mca/mpool/rgpusm/mpool_rgpusm.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * @@ -41,6 +41,7 @@ struct mca_mpool_rgpusm_component_t { bool print_stats; int leave_pinned; int output; + bool empty_cache; }; typedef struct mca_mpool_rgpusm_component_t mca_mpool_rgpusm_component_t; diff --git a/opal/mca/mpool/rgpusm/mpool_rgpusm_component.c b/opal/mca/mpool/rgpusm/mpool_rgpusm_component.c index 20179db463..6b2d2016d7 100644 --- a/opal/mca/mpool/rgpusm/mpool_rgpusm_component.c +++ b/opal/mca/mpool/rgpusm/mpool_rgpusm_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006 Voltaire. All rights reserved. * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * @@ -127,6 +127,15 @@ static int rgpusm_register(void) MCA_BASE_VAR_SCOPE_READONLY, &opal_mpool_rgpusm_verbose); + /* Force emptying of entire registration cache when it gets full */ + mca_mpool_rgpusm_component.empty_cache = false; + (void) mca_base_component_var_register(&mca_mpool_rgpusm_component.super.mpool_version, + "empty_cache", "When set, empty entire registration cache when it is full", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_mpool_rgpusm_component.empty_cache); + return OPAL_SUCCESS; } diff --git a/opal/mca/mpool/rgpusm/mpool_rgpusm_module.c b/opal/mca/mpool/rgpusm/mpool_rgpusm_module.c index 890bedb145..aa96d90672 100644 --- a/opal/mca/mpool/rgpusm/mpool_rgpusm_module.c +++ b/opal/mca/mpool/rgpusm/mpool_rgpusm_module.c @@ -14,7 +14,7 @@ * Copyright (c) 2006 Voltaire. All rights reserved. * Copyright (c) 2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2012-2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * @@ -406,12 +406,35 @@ int mca_mpool_rgpusm_register(mca_mpool_base_module_t *mpool, void *addr, opal_output_verbose(80, mca_mpool_rgpusm_component.output, "RGPUSM: About to insert in rgpusm cache addr=%p, size=%d", addr, (int)size); - while((rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg, - mca_mpool_rgpusm_component.rcache_size_limit)) == - OPAL_ERR_TEMP_OUT_OF_RESOURCE) { - opal_output(-1, "No room in the cache - boot one out"); - if (!mca_mpool_rgpusm_deregister_lru(mpool)) { - break; + rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg, + mca_mpool_rgpusm_component.rcache_size_limit); + if (OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc) { + opal_output_verbose(40, mca_mpool_rgpusm_component.output, + "RGPUSM: No room in the cache - boot the first one out"); + (void)mca_mpool_rgpusm_deregister_lru(mpool); + if (mca_mpool_rgpusm_component.empty_cache) { + int remNum = 1; + /* Empty out every registration from LRU until it is empty */ + opal_output_verbose(40, mca_mpool_rgpusm_component.output, + "RGPUSM: About to delete all the unused entries in the cache"); + while (mca_mpool_rgpusm_deregister_lru(mpool)) { + remNum++; + } + opal_output_verbose(40, mca_mpool_rgpusm_component.output, + "RGPUSM: Deleted and deregistered %d entries", remNum); + rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg, + mca_mpool_rgpusm_component.rcache_size_limit); + } else { + /* Check for room after one removal. If not, remove another one until there is space */ + while((rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg, + mca_mpool_rgpusm_component.rcache_size_limit)) == + OPAL_ERR_TEMP_OUT_OF_RESOURCE) { + opal_output_verbose(40, mca_mpool_rgpusm_component.output, + "RGPUSM: No room in the cache - boot one out"); + if (!mca_mpool_rgpusm_deregister_lru(mpool)) { + break; + } + } } }