1
1

Add ability for user to empty the CUDA IPC registration cache when it is full

Этот коммит содержится в:
Rolf vandeVaart 2015-09-16 09:09:49 -04:00
родитель 8b88ea9b13
Коммит 7da614c75e
3 изменённых файлов: 42 добавлений и 9 удалений

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
*
@ -41,6 +41,7 @@ struct mca_mpool_rgpusm_component_t {
bool print_stats;
int leave_pinned;
int output;
bool empty_cache;
};
typedef struct mca_mpool_rgpusm_component_t mca_mpool_rgpusm_component_t;

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
*
@ -127,6 +127,15 @@ static int rgpusm_register(void)
MCA_BASE_VAR_SCOPE_READONLY,
&opal_mpool_rgpusm_verbose);
/* Force emptying of entire registration cache when it gets full */
mca_mpool_rgpusm_component.empty_cache = false;
(void) mca_base_component_var_register(&mca_mpool_rgpusm_component.super.mpool_version,
"empty_cache", "When set, empty entire registration cache when it is full",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_mpool_rgpusm_component.empty_cache);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
*
@ -406,12 +406,35 @@ int mca_mpool_rgpusm_register(mca_mpool_base_module_t *mpool, void *addr,
opal_output_verbose(80, mca_mpool_rgpusm_component.output,
"RGPUSM: About to insert in rgpusm cache addr=%p, size=%d", addr, (int)size);
while((rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg,
mca_mpool_rgpusm_component.rcache_size_limit)) ==
OPAL_ERR_TEMP_OUT_OF_RESOURCE) {
opal_output(-1, "No room in the cache - boot one out");
if (!mca_mpool_rgpusm_deregister_lru(mpool)) {
break;
rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg,
mca_mpool_rgpusm_component.rcache_size_limit);
if (OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc) {
opal_output_verbose(40, mca_mpool_rgpusm_component.output,
"RGPUSM: No room in the cache - boot the first one out");
(void)mca_mpool_rgpusm_deregister_lru(mpool);
if (mca_mpool_rgpusm_component.empty_cache) {
int remNum = 1;
/* Empty out every registration from LRU until it is empty */
opal_output_verbose(40, mca_mpool_rgpusm_component.output,
"RGPUSM: About to delete all the unused entries in the cache");
while (mca_mpool_rgpusm_deregister_lru(mpool)) {
remNum++;
}
opal_output_verbose(40, mca_mpool_rgpusm_component.output,
"RGPUSM: Deleted and deregistered %d entries", remNum);
rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg,
mca_mpool_rgpusm_component.rcache_size_limit);
} else {
/* Check for room after one removal. If not, remove another one until there is space */
while((rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg,
mca_mpool_rgpusm_component.rcache_size_limit)) ==
OPAL_ERR_TEMP_OUT_OF_RESOURCE) {
opal_output_verbose(40, mca_mpool_rgpusm_component.output,
"RGPUSM: No room in the cache - boot one out");
if (!mca_mpool_rgpusm_deregister_lru(mpool)) {
break;
}
}
}
}