Add ability for user to empty the CUDA IPC registration cache when it is full
Этот коммит содержится в:
родитель
8b88ea9b13
Коммит
7da614c75e
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
@ -41,6 +41,7 @@ struct mca_mpool_rgpusm_component_t {
|
||||
bool print_stats;
|
||||
int leave_pinned;
|
||||
int output;
|
||||
bool empty_cache;
|
||||
};
|
||||
typedef struct mca_mpool_rgpusm_component_t mca_mpool_rgpusm_component_t;
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
@ -127,6 +127,15 @@ static int rgpusm_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&opal_mpool_rgpusm_verbose);
|
||||
|
||||
/* Force emptying of entire registration cache when it gets full */
|
||||
mca_mpool_rgpusm_component.empty_cache = false;
|
||||
(void) mca_base_component_var_register(&mca_mpool_rgpusm_component.super.mpool_version,
|
||||
"empty_cache", "When set, empty entire registration cache when it is full",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_mpool_rgpusm_component.empty_cache);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012-2014 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
@ -406,12 +406,35 @@ int mca_mpool_rgpusm_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
|
||||
opal_output_verbose(80, mca_mpool_rgpusm_component.output,
|
||||
"RGPUSM: About to insert in rgpusm cache addr=%p, size=%d", addr, (int)size);
|
||||
while((rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg,
|
||||
mca_mpool_rgpusm_component.rcache_size_limit)) ==
|
||||
OPAL_ERR_TEMP_OUT_OF_RESOURCE) {
|
||||
opal_output(-1, "No room in the cache - boot one out");
|
||||
if (!mca_mpool_rgpusm_deregister_lru(mpool)) {
|
||||
break;
|
||||
rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg,
|
||||
mca_mpool_rgpusm_component.rcache_size_limit);
|
||||
if (OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
||||
opal_output_verbose(40, mca_mpool_rgpusm_component.output,
|
||||
"RGPUSM: No room in the cache - boot the first one out");
|
||||
(void)mca_mpool_rgpusm_deregister_lru(mpool);
|
||||
if (mca_mpool_rgpusm_component.empty_cache) {
|
||||
int remNum = 1;
|
||||
/* Empty out every registration from LRU until it is empty */
|
||||
opal_output_verbose(40, mca_mpool_rgpusm_component.output,
|
||||
"RGPUSM: About to delete all the unused entries in the cache");
|
||||
while (mca_mpool_rgpusm_deregister_lru(mpool)) {
|
||||
remNum++;
|
||||
}
|
||||
opal_output_verbose(40, mca_mpool_rgpusm_component.output,
|
||||
"RGPUSM: Deleted and deregistered %d entries", remNum);
|
||||
rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg,
|
||||
mca_mpool_rgpusm_component.rcache_size_limit);
|
||||
} else {
|
||||
/* Check for room after one removal. If not, remove another one until there is space */
|
||||
while((rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg,
|
||||
mca_mpool_rgpusm_component.rcache_size_limit)) ==
|
||||
OPAL_ERR_TEMP_OUT_OF_RESOURCE) {
|
||||
opal_output_verbose(40, mca_mpool_rgpusm_component.output,
|
||||
"RGPUSM: No room in the cache - boot one out");
|
||||
if (!mca_mpool_rgpusm_deregister_lru(mpool)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user