OSHMEM: Add two new mca variables
Added use_hp flag in sshmem/sysv variable to control huge page usage; Added shared_mr sshmem/verbs; Both paraemetes are set in auto. Fix help messages fixed by Igor, reviewed by @miked-mellanox and @alex-mikheev
Этот коммит содержится в:
родитель
067fa05209
Коммит
d82dc7f67f
@ -13,7 +13,7 @@
|
||||
#
|
||||
[create segment failure]
|
||||
The OpenSHMEM "(%s)" plugin in the "sshmem" framework failed to
|
||||
allocate a shared memory segment via the %s system call. This
|
||||
allocate a shared memory segment via the system call. This
|
||||
usually means that there are not enough resources available to memory subsystem on your server.
|
||||
|
||||
Your OpenSHMEM job will now abort.
|
||||
|
@ -196,8 +196,8 @@ segment_create(map_segment_t *ds_buf,
|
||||
if (MAP_FAILED == addr) {
|
||||
opal_show_help("help-oshmem-sshmem.txt",
|
||||
"create segment failure",
|
||||
"mmap",
|
||||
true,
|
||||
"mmap",
|
||||
orte_process_info.nodename, (unsigned long long) size,
|
||||
strerror(errno), errno);
|
||||
opal_show_help("help-oshmem-sshmem-mmap.txt",
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "opal/util/sys_limits.h"
|
||||
|
||||
#include "oshmem/mca/sshmem/sshmem.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
@ -36,6 +38,36 @@ typedef struct mca_sshmem_sysv_module_t {
|
||||
} mca_sshmem_sysv_module_t;
|
||||
extern mca_sshmem_sysv_module_t mca_sshmem_sysv_module;
|
||||
|
||||
/*
|
||||
* Get current huge page size
|
||||
*/
|
||||
static size_t oshmem_gethugepagesize(void)
|
||||
{
|
||||
static size_t huge_page_size = 0;
|
||||
char buf[256];
|
||||
int size_kb;
|
||||
FILE *f;
|
||||
|
||||
/* Cache the huge page size value */
|
||||
if (huge_page_size == 0) {
|
||||
f = fopen("/proc/meminfo", "r");
|
||||
if (f != NULL) {
|
||||
while (fgets(buf, sizeof(buf), f)) {
|
||||
if (sscanf(buf, "Hugepagesize: %d kB", &size_kb) == 1) {
|
||||
huge_page_size = size_kb * 1024L;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
if (huge_page_size == 0) {
|
||||
huge_page_size = 2 * 1024L *1024L;
|
||||
}
|
||||
}
|
||||
|
||||
return huge_page_size;
|
||||
}
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_SSHMEM_SYSV_EXPORT_H */
|
||||
|
@ -111,15 +111,30 @@ sysv_runtime_query(mca_base_module_t **module,
|
||||
/* if we are here, then let the run-time test games begin */
|
||||
|
||||
#if defined (SHM_HUGETLB)
|
||||
mca_sshmem_sysv_component.use_hp = 1;
|
||||
flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR | SHM_HUGETLB;
|
||||
if (-1 == (shmid = shmget(IPC_PRIVATE, (size_t)(opal_getpagesize()), flags))) {
|
||||
mca_sshmem_sysv_component.use_hp = 0;
|
||||
if (mca_sshmem_sysv_component.use_hp > 0) {
|
||||
flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR | SHM_HUGETLB;
|
||||
if (-1 == (shmid = shmget(IPC_PRIVATE, oshmem_gethugepagesize(), flags))) {
|
||||
if (mca_sshmem_sysv_component.use_hp == 1) {
|
||||
mca_sshmem_sysv_component.use_hp = 0;
|
||||
goto out;
|
||||
}
|
||||
mca_sshmem_sysv_component.use_hp = 0;
|
||||
}
|
||||
else if ((void *)-1 == (addr = shmat(shmid, NULL, 0))) {
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
if (mca_sshmem_sysv_component.use_hp == 1) {
|
||||
mca_sshmem_sysv_component.use_hp = 0;
|
||||
goto out;
|
||||
}
|
||||
mca_sshmem_sysv_component.use_hp = 0;
|
||||
}
|
||||
}
|
||||
else if ((void *)-1 == (addr = shmat(shmid, NULL, 0))) {
|
||||
shmctl(shmid, IPC_RMID, NULL );
|
||||
#else
|
||||
if (mca_sshmem_sysv_component.use_hp == 1) {
|
||||
mca_sshmem_sysv_component.use_hp = 0;
|
||||
goto out;
|
||||
}
|
||||
mca_sshmem_sysv_component.use_hp = 0;
|
||||
#endif
|
||||
|
||||
if (0 == mca_sshmem_sysv_component.use_hp) {
|
||||
@ -128,7 +143,7 @@ sysv_runtime_query(mca_base_module_t **module,
|
||||
goto out;
|
||||
}
|
||||
else if ((void *)-1 == (addr = shmat(shmid, NULL, 0))) {
|
||||
shmctl(shmid, IPC_RMID, NULL );
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@ -171,7 +186,14 @@ sysv_register(void)
|
||||
MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||||
&mca_sshmem_sysv_component.priority);
|
||||
|
||||
mca_sshmem_sysv_component.use_hp = 0;
|
||||
mca_sshmem_sysv_component.use_hp = 2;
|
||||
mca_base_component_var_register (&mca_sshmem_sysv_component.super.base_version,
|
||||
"use_hp", "Huge pages usage "
|
||||
"[0 - off, 1 - on, 2 - auto] (default: 2)", MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||||
&mca_sshmem_sysv_component.use_hp);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
@ -189,6 +189,7 @@ segment_create(map_segment_t *ds_buf,
|
||||
flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;
|
||||
#if defined (SHM_HUGETLB)
|
||||
flags |= (mca_sshmem_sysv_component.use_hp ? SHM_HUGETLB : 0);
|
||||
size = ((size + oshmem_gethugepagesize() - 1) / oshmem_gethugepagesize()) * oshmem_gethugepagesize();
|
||||
#endif
|
||||
|
||||
/* Create a new shared memory segment and save the shmid. */
|
||||
@ -206,7 +207,7 @@ segment_create(map_segment_t *ds_buf,
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* Attach to the sement */
|
||||
/* Attach to the segment */
|
||||
addr = shmat(shmid, (void *) mca_sshmem_base_start_address, 0);
|
||||
if (addr == (void *) -1L) {
|
||||
opal_show_help("help-oshmem-sshmem.txt",
|
||||
|
@ -176,7 +176,7 @@ verbs_runtime_query(mca_base_module_t **module,
|
||||
}
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
if (!rc) {
|
||||
if (!rc && mca_sshmem_verbs_component.has_shared_mr > 0) {
|
||||
struct ibv_exp_reg_shared_mr_in in_smr;
|
||||
|
||||
access_flag = IBV_ACCESS_LOCAL_WRITE |
|
||||
@ -188,13 +188,19 @@ verbs_runtime_query(mca_base_module_t **module,
|
||||
mca_sshmem_verbs_fill_shared_mr(&in_smr, device->ib_pd, device->ib_mr_shared->handle, addr, access_flag);
|
||||
ib_mr = ibv_exp_reg_shared_mr(&in_smr);
|
||||
if (NULL == ib_mr) {
|
||||
if (mca_sshmem_verbs_component.has_shared_mr == 1)
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
mca_sshmem_verbs_component.has_shared_mr = 0;
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
} else {
|
||||
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
|
||||
mca_sshmem_verbs_component.has_shared_mr = 1;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (!rc && mca_sshmem_verbs_component.has_shared_mr == 1) {
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
mca_sshmem_verbs_component.has_shared_mr = 0;
|
||||
#endif /* MPAGE_ENABLE */
|
||||
}
|
||||
|
||||
@ -285,6 +291,15 @@ verbs_register(void)
|
||||
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
|
||||
}
|
||||
|
||||
mca_sshmem_verbs_component.has_shared_mr = 2;
|
||||
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
|
||||
"shared_mr", "Shared memory region usage "
|
||||
"[0 - off, 1 - on, 2 - auto] (default: 2)", MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||||
&mca_sshmem_verbs_component.has_shared_mr);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user